class LwopDecoderFeaturesBend(nn.Layer):
    """
    Lightweight OpenPose 2D/3D specific decoder 3D features bend.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    mid_channels : int
        Number of middle channels.
    out_channels : int
        Number of output channels.
    data_format : str, default 'channels_last'
        The ordering of the dimensions in tensors.
    """
    def __init__(self,
                 in_channels,
                 mid_channels,
                 out_channels,
                 data_format="channels_last",
                 **kwargs):
        super(LwopDecoderFeaturesBend, self).__init__(**kwargs)
        self.body = SimpleSequential(name="body")
        for i in range(2):
            self.body.add(
                LwopRefinementBlock(in_channels=in_channels,
                                    out_channels=mid_channels,
                                    data_format=data_format,
                                    name="block{}".format(i + 1)))
            in_channels = mid_channels
        self.features_bend = LwopDecoderBend(in_channels=mid_channels,
                                             mid_channels=mid_channels,
                                             out_channels=out_channels,
                                             data_format=data_format,
                                             name="features_bend")

    def call(self, x, training=None):
        x = self.body(x, training=training)
        x = self.features_bend(x, training=training)
        return x
예제 #2
0
    def __init__(self,
                 in_channels_list,
                 out_channels_list,
                 num_modules,
                 num_branches,
                 num_subblocks,
                 data_format="channels_last",
                 **kwargs):
        super(HRStage, self).__init__(**kwargs)
        self.branches = num_branches
        self.in_channels_list = out_channels_list
        in_branches = len(in_channels_list)
        out_branches = len(out_channels_list)

        self.transition = SimpleSequential(name="transition")
        for i in range(out_branches):
            if i < in_branches:
                if out_channels_list[i] != in_channels_list[i]:
                    self.transition.add(conv3x3_block(
                        in_channels=in_channels_list[i],
                        out_channels=out_channels_list[i],
                        strides=1,
                        data_format=data_format,
                        name="transition/block{}".format(i + 1)))
                else:
                    self.transition.add(Identity(name="transition/block{}".format(i + 1)))
            else:
                conv3x3_seq = SimpleSequential(name="transition/conv3x3_seq{}".format(i + 1))
                for j in range(i + 1 - in_branches):
                    in_channels_i = in_channels_list[-1]
                    out_channels_i = out_channels_list[i] if j == i - in_branches else in_channels_i
                    conv3x3_seq.add(conv3x3_block(
                        in_channels=in_channels_i,
                        out_channels=out_channels_i,
                        strides=2,
                        data_format=data_format,
                        name="subblock{}".format(j + 1)))
                self.transition.add(conv3x3_seq)

        self.layers = SimpleSequential(name="layers")
        for i in range(num_modules):
            self.layers.add(HRBlock(
                in_channels_list=self.in_channels_list,
                out_channels_list=out_channels_list,
                num_branches=num_branches,
                num_subblocks=num_subblocks,
                data_format=data_format,
                name="block{}".format(i + 1)))
            self.in_channels_list = list(self.layers[-1].in_channels_list)
예제 #3
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 strides,
                 reps,
                 start_with_relu=True,
                 grow_first=True,
                 data_format="channels_last",
                 **kwargs):
        super(XceptionUnit, self).__init__(**kwargs)
        self.resize_identity = (in_channels != out_channels) or (strides != 1)

        if self.resize_identity:
            self.identity_conv = conv1x1_block(in_channels=in_channels,
                                               out_channels=out_channels,
                                               strides=strides,
                                               activation=None,
                                               data_format=data_format,
                                               name="identity_conv")

        self.body = SimpleSequential(name="body")
        for i in range(reps):
            if (grow_first and (i == 0)) or ((not grow_first) and
                                             (i == reps - 1)):
                in_channels_i = in_channels
                out_channels_i = out_channels
            else:
                if grow_first:
                    in_channels_i = out_channels
                    out_channels_i = out_channels
                else:
                    in_channels_i = in_channels
                    out_channels_i = in_channels
            activate = start_with_relu if (i == 0) else True
            self.body.children.append(
                dws_conv3x3_block(in_channels=in_channels_i,
                                  out_channels=out_channels_i,
                                  activate=activate,
                                  data_format=data_format,
                                  name="block{}".format(i + 1)))
        if strides != 1:
            self.body.children.append(
                MaxPool2d(pool_size=3,
                          strides=strides,
                          padding=1,
                          data_format=data_format,
                          name="pool"))
예제 #4
0
    def __init__(self,
                 channels,
                 first_stage_stride,
                 dw_use_bn=True,
                 dw_activation="relu",
                 in_channels=3,
                 in_size=(224, 224),
                 classes=1000,
                 data_format="channels_last",
                 **kwargs):
        super(MobileNet, self).__init__(**kwargs)
        self.in_size = in_size
        self.classes = classes
        self.data_format = data_format

        self.features = SimpleSequential(name="features")
        init_block_channels = channels[0][0]
        self.features.add(
            conv3x3_block(in_channels=in_channels,
                          out_channels=init_block_channels,
                          strides=2,
                          data_format=data_format,
                          name="init_block"))
        in_channels = init_block_channels
        for i, channels_per_stage in enumerate(channels[1:]):
            stage = SimpleSequential(name="stage{}".format(i + 1))
            for j, out_channels in enumerate(channels_per_stage):
                strides = 2 if (j == 0) and (
                    (i != 0) or first_stage_stride) else 1
                stage.add(
                    dwsconv3x3_block(in_channels=in_channels,
                                     out_channels=out_channels,
                                     strides=strides,
                                     dw_use_bn=dw_use_bn,
                                     dw_activation=dw_activation,
                                     data_format=data_format,
                                     name="unit{}".format(j + 1)))
                in_channels = out_channels
            self.features.add(stage)
        self.features.add(
            nn.AveragePooling2D(pool_size=7,
                                strides=1,
                                data_format=data_format,
                                name="final_pool"))

        self.output1 = nn.Dense(units=classes,
                                input_dim=in_channels,
                                name="output1")
예제 #5
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 mid_channels_list,
                 kernel_size_list,
                 strides_list,
                 padding_list,
                 data_format="channels_last",
                 **kwargs):
        super(ConvSeq3x3Branch, self).__init__(**kwargs)
        self.data_format = data_format

        self.conv_list = SimpleSequential(name="conv_list")
        for i, (mid_channels, kernel_size, strides, padding) in enumerate(
                zip(mid_channels_list, kernel_size_list, strides_list,
                    padding_list)):
            self.conv_list.children.append(
                InceptConv(in_channels=in_channels,
                           out_channels=mid_channels,
                           kernel_size=kernel_size,
                           strides=strides,
                           padding=padding,
                           data_format=data_format,
                           name="conv{}".format(i + 1)))
            in_channels = mid_channels
        self.conv1x3 = InceptConv(in_channels=in_channels,
                                  out_channels=out_channels,
                                  kernel_size=(1, 3),
                                  strides=1,
                                  padding=(0, 1),
                                  data_format=data_format,
                                  name="conv1x3")
        self.conv3x1 = InceptConv(in_channels=in_channels,
                                  out_channels=out_channels,
                                  kernel_size=(3, 1),
                                  strides=1,
                                  padding=(1, 0),
                                  data_format=data_format,
                                  name="conv3x1")
예제 #6
0
class SBStage(nn.Layer):
    """
    SB stage.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    down_channels : int
        Number of output channels for a downscale block.
    channels_list : list of int
        Number of output channels for all residual block.
    kernel_sizes_list : list of int
        Convolution window size for branches.
    scale_factors_list : list of int
        Scale factor for branches.
    use_residual_list : list of int
        List of flags for using residual in each ESP-block.
    se_reduction : int
        Squeeze reduction value (0 means no-se).
    in_size : tuple of 2 int
        Spatial size of the output tensor for the bilinear upsampling operation.
    bn_eps : float
        Small float added to variance in Batch norm.
    data_format : str, default 'channels_last'
        The ordering of the dimensions in tensors.
    """
    def __init__(self,
                 in_channels,
                 down_channels,
                 channels_list,
                 kernel_sizes_list,
                 scale_factors_list,
                 use_residual_list,
                 se_reduction,
                 in_size,
                 bn_eps,
                 data_format="channels_last",
                 **kwargs):
        super(SBStage, self).__init__(**kwargs)
        self.data_format = data_format

        self.down_conv = dwsconv3x3_block(
            in_channels=in_channels,
            out_channels=down_channels,
            strides=2,
            dw_use_bn=False,
            bn_eps=bn_eps,
            dw_activation=None,
            pw_activation=(lambda: PReLU2(
                down_channels, data_format=data_format, name="activ")),
            se_reduction=se_reduction,
            data_format=data_format,
            name="down_conv")
        in_channels = down_channels

        self.main_branch = SimpleSequential(name="main_branch")
        for i, out_channels in enumerate(channels_list):
            use_residual = (use_residual_list[i] == 1)
            kernel_sizes = kernel_sizes_list[i]
            scale_factors = scale_factors_list[i]
            self.main_branch.add(
                ESPBlock(in_channels=in_channels,
                         out_channels=out_channels,
                         kernel_sizes=kernel_sizes,
                         scale_factors=scale_factors,
                         use_residual=use_residual,
                         in_size=((in_size[0] // 2,
                                   in_size[1] // 2) if in_size else None),
                         bn_eps=bn_eps,
                         data_format=data_format,
                         name="block{}".format(i + 1)))
            in_channels = out_channels

        self.preactiv = PreActivation(in_channels=(down_channels +
                                                   in_channels),
                                      bn_eps=bn_eps,
                                      data_format=data_format,
                                      name="preactiv")

    def call(self, x, training=None):
        x = self.down_conv(x, training=None)
        y = self.main_branch(x, training=None)
        x = tf.concat([x, y], axis=get_channel_axis(self.data_format))
        x = self.preactiv(x, training=None)
        return x, y
    def __init__(self,
                 encoder_channels,
                 encoder_paddings,
                 encoder_init_block_channels,
                 encoder_final_block_channels,
                 refinement_units,
                 calc_3d_features,
                 return_heatmap=True,
                 in_channels=3,
                 in_size=(368, 368),
                 keypoints=19,
                 data_format="channels_last",
                 **kwargs):
        super(LwOpenPose, self).__init__(**kwargs)
        assert (in_channels == 3)
        self.in_size = in_size
        self.keypoints = keypoints
        self.data_format = data_format
        self.return_heatmap = return_heatmap
        self.calc_3d_features = calc_3d_features
        num_heatmap_paf = 3 * keypoints

        self.encoder = tf.keras.Sequential(name="encoder")
        backbone = SimpleSequential(name="backbone")
        backbone.add(
            conv3x3_block(in_channels=in_channels,
                          out_channels=encoder_init_block_channels,
                          strides=2,
                          data_format=data_format,
                          name="init_block"))
        in_channels = encoder_init_block_channels
        for i, channels_per_stage in enumerate(encoder_channels):
            stage = SimpleSequential(name="stage{}".format(i + 1))
            for j, out_channels in enumerate(channels_per_stage):
                strides = 2 if (j == 0) and (i != 0) else 1
                padding = encoder_paddings[i][j]
                stage.add(
                    dwsconv3x3_block(in_channels=in_channels,
                                     out_channels=out_channels,
                                     strides=strides,
                                     padding=padding,
                                     dilation=padding,
                                     data_format=data_format,
                                     name="unit{}".format(j + 1)))
                in_channels = out_channels
            backbone.add(stage)
        self.encoder.add(backbone)
        self.encoder.add(
            LwopEncoderFinalBlock(in_channels=in_channels,
                                  out_channels=encoder_final_block_channels,
                                  data_format=data_format,
                                  name="final_block"))
        in_channels = encoder_final_block_channels

        self.decoder = tf.keras.Sequential(name="decoder")
        self.decoder.add(
            LwopDecoderInitBlock(in_channels=in_channels,
                                 keypoints=keypoints,
                                 data_format=data_format,
                                 name="init_block"))
        in_channels = encoder_final_block_channels + num_heatmap_paf
        for i in range(refinement_units):
            self.decoder.add(
                LwopDecoderUnit(in_channels=in_channels,
                                keypoints=keypoints,
                                data_format=data_format,
                                name="unit{}".format(i + 1)))
        self.decoder.add(
            LwopDecoderFinalBlock(in_channels=in_channels,
                                  keypoints=keypoints,
                                  bottleneck_factor=2,
                                  calc_3d_features=calc_3d_features,
                                  data_format=data_format,
                                  name="final_block"))
예제 #8
0
class IbpPose(tf.keras.Model):
    """
    IBPPose model from 'Simple Pose: Rethinking and Improving a Bottom-up Approach for Multi-Person Pose Estimation,'
    https://arxiv.org/abs/1911.10529.

    Parameters:
    ----------
    passes : int
        Number of passes.
    backbone_out_channels : int
        Number of output channels for the backbone.
    outs_channels : int
        Number of output channels for the backbone.
    depth : int
        Depth of hourglass.
    growth_rate : int
        Addition for number of channel for each level.
    use_bn : bool
        Whether to use BatchNorm layer.
    in_channels : int, default 3
        Number of input channels.
    in_size : tuple of two ints, default (256, 256)
        Spatial size of the expected input image.
    data_format : str, default 'channels_last'
        The ordering of the dimensions in tensors.
    """
    def __init__(self,
                 passes,
                 backbone_out_channels,
                 outs_channels,
                 depth,
                 growth_rate,
                 use_bn,
                 in_channels=3,
                 in_size=(256, 256),
                 data_format="channels_last",
                 **kwargs):
        super(IbpPose, self).__init__(**kwargs)
        self.in_size = in_size
        self.data_format = data_format
        activation = nn.LeakyReLU(alpha=0.01)

        self.backbone = IbpBackbone(in_channels=in_channels,
                                    out_channels=backbone_out_channels,
                                    activation=activation,
                                    data_format=data_format,
                                    name="backbone")

        self.decoder = SimpleSequential(name="decoder")
        for i in range(passes):
            merge = (i != passes - 1)
            self.decoder.add(
                IbpPass(channels=backbone_out_channels,
                        mid_channels=outs_channels,
                        depth=depth,
                        growth_rate=growth_rate,
                        merge=merge,
                        use_bn=use_bn,
                        activation=activation,
                        data_format=data_format,
                        name="pass{}".format(i + 1)))

    def call(self, x, training=None):
        x = self.backbone(x, training=training)
        x_prev = None
        for block in self.decoder.children:
            if x_prev is not None:
                x = x + x_prev
            x_prev = block(x, x_prev, training=training)
        return x_prev
예제 #9
0
    def __init__(self,
                 channels,
                 mid_channels,
                 depth,
                 growth_rate,
                 merge,
                 use_bn,
                 activation,
                 data_format="channels_last",
                 **kwargs):
        super(IbpPass, self).__init__(**kwargs)
        self.merge = merge

        down_seq = SimpleSequential(name="down_seq")
        up_seq = SimpleSequential(name="up_seq")
        skip_seq = SimpleSequential(name="skip_seq")
        top_channels = channels
        bottom_channels = channels
        for i in range(depth + 1):
            skip_seq.add(
                IbpResUnit(in_channels=top_channels,
                           out_channels=top_channels,
                           activation=activation,
                           data_format=data_format,
                           name="skip{}".format(i + 1)))
            bottom_channels += growth_rate
            if i < depth:
                down_seq.add(
                    IbpDownBlock(in_channels=top_channels,
                                 out_channels=bottom_channels,
                                 activation=activation,
                                 data_format=data_format,
                                 name="down{}".format(i + 1)))
                up_seq.add(
                    IbpUpBlock(in_channels=bottom_channels,
                               out_channels=top_channels,
                               use_bn=use_bn,
                               activation=activation,
                               data_format=data_format,
                               name="up{}".format(i + 1)))
            top_channels = bottom_channels
        self.hg = Hourglass(down_seq=down_seq,
                            up_seq=up_seq,
                            skip_seq=skip_seq,
                            name="hg")

        self.pre_block = IbpPreBlock(out_channels=channels,
                                     use_bn=use_bn,
                                     activation=activation,
                                     data_format=data_format,
                                     name="pre_block")
        self.post_block = conv1x1_block(in_channels=channels,
                                        out_channels=mid_channels,
                                        use_bias=True,
                                        use_bn=False,
                                        activation=None,
                                        data_format=data_format,
                                        name="post_block")

        if self.merge:
            self.pre_merge_block = MergeBlock(in_channels=channels,
                                              out_channels=channels,
                                              use_bn=use_bn,
                                              data_format=data_format,
                                              name="pre_merge_block")
            self.post_merge_block = MergeBlock(in_channels=mid_channels,
                                               out_channels=channels,
                                               use_bn=use_bn,
                                               data_format=data_format,
                                               name="post_merge_block")
예제 #10
0
    def __init__(self,
                 audio_features,
                 audio_window_size,
                 seq_len,
                 encoder_features,
                 data_format="channels_last",
                 **kwargs):
        super(NvpAttExpEncoder, self).__init__(**kwargs)
        self.audio_features = audio_features
        self.audio_window_size = audio_window_size
        self.seq_len = seq_len
        self.data_format = data_format
        conv_channels = (32, 32, 64, 64)
        conv_slopes = (0.02, 0.02, 0.2, 0.2)
        fc_channels = (128, 64, encoder_features)
        fc_slopes = (0.02, 0.02, None)
        att_conv_channels = (16, 8, 4, 2, 1)
        att_conv_slopes = 0.02

        in_channels = audio_features
        self.conv_branch = SimpleSequential(name="conv_branch")
        for i, (out_channels,
                slope) in enumerate(zip(conv_channels, conv_slopes)):
            self.conv_branch.add(
                ConvBlock(in_channels=in_channels,
                          out_channels=out_channels,
                          kernel_size=(3, 1),
                          strides=(2, 1),
                          padding=(1, 0),
                          use_bias=True,
                          use_bn=False,
                          activation=nn.LeakyReLU(alpha=slope),
                          data_format=data_format,
                          name="conv{}".format(i + 1)))
            in_channels = out_channels

        self.fc_branch = SimpleSequential(name="fc_branch")
        for i, (out_channels, slope) in enumerate(zip(fc_channels, fc_slopes)):
            activation = nn.LeakyReLU(
                alpha=slope) if slope is not None else "tanh"
            self.fc_branch.add(
                DenseBlock(in_channels=in_channels,
                           out_channels=out_channels,
                           use_bias=True,
                           use_bn=False,
                           activation=activation,
                           data_format=data_format,
                           name="fc{}".format(i + 1)))
            in_channels = out_channels

        self.att_conv_branch = SimpleSequential(name="att_conv_branch")
        for i, out_channels, in enumerate(att_conv_channels):
            self.att_conv_branch.add(
                ConvBlock1d(in_channels=in_channels,
                            out_channels=out_channels,
                            kernel_size=3,
                            strides=1,
                            padding=1,
                            use_bias=True,
                            use_bn=False,
                            activation=nn.LeakyReLU(alpha=att_conv_slopes),
                            data_format=data_format,
                            name="att_conv{}".format(i + 1)))
            in_channels = out_channels

        self.att_fc = DenseBlock(in_channels=seq_len,
                                 out_channels=seq_len,
                                 use_bias=True,
                                 use_bn=False,
                                 activation=nn.Softmax(axis=1),
                                 data_format=data_format,
                                 name="att_fc")
예제 #11
0
class NvpAttExpEncoder(nn.Layer):
    """
    Neural Voice Puppetry Audio-to-Expression encoder.

    Parameters:
    ----------
    audio_features : int
        Number of audio features (characters/sounds).
    audio_window_size : int
        Size of audio window (for time related audio features).
    seq_len : int, default
        Size of feature window.
    encoder_features : int
        Number of encoder features.
    data_format : str, default 'channels_last'
        The ordering of the dimensions in tensors.
    """
    def __init__(self,
                 audio_features,
                 audio_window_size,
                 seq_len,
                 encoder_features,
                 data_format="channels_last",
                 **kwargs):
        super(NvpAttExpEncoder, self).__init__(**kwargs)
        self.audio_features = audio_features
        self.audio_window_size = audio_window_size
        self.seq_len = seq_len
        self.data_format = data_format
        conv_channels = (32, 32, 64, 64)
        conv_slopes = (0.02, 0.02, 0.2, 0.2)
        fc_channels = (128, 64, encoder_features)
        fc_slopes = (0.02, 0.02, None)
        att_conv_channels = (16, 8, 4, 2, 1)
        att_conv_slopes = 0.02

        in_channels = audio_features
        self.conv_branch = SimpleSequential(name="conv_branch")
        for i, (out_channels,
                slope) in enumerate(zip(conv_channels, conv_slopes)):
            self.conv_branch.add(
                ConvBlock(in_channels=in_channels,
                          out_channels=out_channels,
                          kernel_size=(3, 1),
                          strides=(2, 1),
                          padding=(1, 0),
                          use_bias=True,
                          use_bn=False,
                          activation=nn.LeakyReLU(alpha=slope),
                          data_format=data_format,
                          name="conv{}".format(i + 1)))
            in_channels = out_channels

        self.fc_branch = SimpleSequential(name="fc_branch")
        for i, (out_channels, slope) in enumerate(zip(fc_channels, fc_slopes)):
            activation = nn.LeakyReLU(
                alpha=slope) if slope is not None else "tanh"
            self.fc_branch.add(
                DenseBlock(in_channels=in_channels,
                           out_channels=out_channels,
                           use_bias=True,
                           use_bn=False,
                           activation=activation,
                           data_format=data_format,
                           name="fc{}".format(i + 1)))
            in_channels = out_channels

        self.att_conv_branch = SimpleSequential(name="att_conv_branch")
        for i, out_channels, in enumerate(att_conv_channels):
            self.att_conv_branch.add(
                ConvBlock1d(in_channels=in_channels,
                            out_channels=out_channels,
                            kernel_size=3,
                            strides=1,
                            padding=1,
                            use_bias=True,
                            use_bn=False,
                            activation=nn.LeakyReLU(alpha=att_conv_slopes),
                            data_format=data_format,
                            name="att_conv{}".format(i + 1)))
            in_channels = out_channels

        self.att_fc = DenseBlock(in_channels=seq_len,
                                 out_channels=seq_len,
                                 use_bias=True,
                                 use_bn=False,
                                 activation=nn.Softmax(axis=1),
                                 data_format=data_format,
                                 name="att_fc")

    def call(self, x, training=None):
        batch = x.shape[0]
        batch_seq_len = batch * self.seq_len

        if is_channels_first(self.data_format):
            x = tf.reshape(x,
                           shape=(-1, 1, self.audio_window_size,
                                  self.audio_features))
            x = tf.transpose(x, perm=(0, 3, 2, 1))
            x = self.conv_branch(x)
            x = tf.squeeze(x, axis=-1)
            x = tf.reshape(x, shape=(batch_seq_len, 1, -1))
            x = self.fc_branch(x)
            x = tf.reshape(x, shape=(batch, self.seq_len, -1))
            x = tf.transpose(x, perm=(0, 2, 1))

            y = x[:, :, (self.seq_len // 2)]

            w = self.att_conv_branch(x)
            w = tf.squeeze(w, axis=1)
            w = self.att_fc(w)
            w = tf.expand_dims(w, axis=-1)
        else:
            x = tf.transpose(x, perm=(0, 3, 1, 2))
            x = tf.reshape(x,
                           shape=(-1, 1, self.audio_window_size,
                                  self.audio_features))
            x = tf.transpose(x, perm=(0, 2, 3, 1))
            x = tf.transpose(x, perm=(0, 1, 3, 2))
            x = self.conv_branch(x)
            x = tf.squeeze(x, axis=1)
            x = self.fc_branch(x)
            x = tf.reshape(x, shape=(batch, self.seq_len, -1))

            y = x[:, (self.seq_len // 2), :]

            w = self.att_conv_branch(x)
            w = tf.squeeze(w, axis=-1)
            w = self.att_fc(w)
            w = tf.expand_dims(w, axis=-1)
            x = tf.transpose(x, perm=(0, 2, 1))

        x = tf.keras.backend.batch_dot(x, w)
        x = tf.squeeze(x, axis=-1)

        return x, y
예제 #12
0
    def __init__(self,
                 in_channels_list,
                 out_channels_list,
                 num_branches,
                 num_subblocks,
                 data_format="channels_last",
                 **kwargs):
        super(HRBlock, self).__init__(**kwargs)
        self.in_channels_list = in_channels_list
        self.num_branches = num_branches

        self.branches = SimpleSequential(name="branches")
        for i in range(num_branches):
            layers = SimpleSequential(name="branches/branch{}".format(i + 1))
            in_channels_i = self.in_channels_list[i]
            out_channels_i = out_channels_list[i]
            for j in range(num_subblocks[i]):
                layers.add(ResUnit(
                    in_channels=in_channels_i,
                    out_channels=out_channels_i,
                    strides=1,
                    bottleneck=False,
                    data_format=data_format,
                    name="unit{}".format(j + 1)))
                in_channels_i = out_channels_i
            self.in_channels_list[i] = out_channels_i
            self.branches.add(layers)

        if num_branches > 1:
            self.fuse_layers = SimpleSequential(name="fuse_layers")
            for i in range(num_branches):
                fuse_layer_name = "fuse_layers/fuse_layer{}".format(i + 1)
                fuse_layer = SimpleSequential(name=fuse_layer_name)
                for j in range(num_branches):
                    if j > i:
                        fuse_layer.add(UpSamplingBlock(
                            in_channels=in_channels_list[j],
                            out_channels=in_channels_list[i],
                            scale_factor=2 ** (j - i),
                            data_format=data_format,
                            name=fuse_layer_name + "/block{}".format(j + 1)))
                    elif j == i:
                        fuse_layer.add(Identity(name=fuse_layer_name + "/block{}".format(j + 1)))
                    else:
                        conv3x3_seq_name = fuse_layer_name + "/block{}_conv3x3_seq".format(j + 1)
                        conv3x3_seq = SimpleSequential(name=conv3x3_seq_name)
                        for k in range(i - j):
                            if k == i - j - 1:
                                conv3x3_seq.add(conv3x3_block(
                                    in_channels=in_channels_list[j],
                                    out_channels=in_channels_list[i],
                                    strides=2,
                                    activation=None,
                                    data_format=data_format,
                                    name="subblock{}".format(k + 1)))
                            else:
                                conv3x3_seq.add(conv3x3_block(
                                    in_channels=in_channels_list[j],
                                    out_channels=in_channels_list[j],
                                    strides=2,
                                    data_format=data_format,
                                    name="subblock{}".format(k + 1)))
                        fuse_layer.add(conv3x3_seq)
                self.fuse_layers.add(fuse_layer)
            self.activ = nn.ReLU()
예제 #13
0
class HRBlock(nn.Layer):
    """
    HFNet block.

    Parameters:
    ----------
    in_channels_list : list of int
        Number of input channels.
    out_channels_list : list of int
        Number of output channels.
    num_branches : int
        Number of branches.
    num_subblocks : list of int
        Number of subblock.
    data_format : str, default 'channels_last'
        The ordering of the dimensions in tensors.
    """
    def __init__(self,
                 in_channels_list,
                 out_channels_list,
                 num_branches,
                 num_subblocks,
                 data_format="channels_last",
                 **kwargs):
        super(HRBlock, self).__init__(**kwargs)
        self.in_channels_list = in_channels_list
        self.num_branches = num_branches

        self.branches = SimpleSequential(name="branches")
        for i in range(num_branches):
            layers = SimpleSequential(name="branches/branch{}".format(i + 1))
            in_channels_i = self.in_channels_list[i]
            out_channels_i = out_channels_list[i]
            for j in range(num_subblocks[i]):
                layers.add(ResUnit(
                    in_channels=in_channels_i,
                    out_channels=out_channels_i,
                    strides=1,
                    bottleneck=False,
                    data_format=data_format,
                    name="unit{}".format(j + 1)))
                in_channels_i = out_channels_i
            self.in_channels_list[i] = out_channels_i
            self.branches.add(layers)

        if num_branches > 1:
            self.fuse_layers = SimpleSequential(name="fuse_layers")
            for i in range(num_branches):
                fuse_layer_name = "fuse_layers/fuse_layer{}".format(i + 1)
                fuse_layer = SimpleSequential(name=fuse_layer_name)
                for j in range(num_branches):
                    if j > i:
                        fuse_layer.add(UpSamplingBlock(
                            in_channels=in_channels_list[j],
                            out_channels=in_channels_list[i],
                            scale_factor=2 ** (j - i),
                            data_format=data_format,
                            name=fuse_layer_name + "/block{}".format(j + 1)))
                    elif j == i:
                        fuse_layer.add(Identity(name=fuse_layer_name + "/block{}".format(j + 1)))
                    else:
                        conv3x3_seq_name = fuse_layer_name + "/block{}_conv3x3_seq".format(j + 1)
                        conv3x3_seq = SimpleSequential(name=conv3x3_seq_name)
                        for k in range(i - j):
                            if k == i - j - 1:
                                conv3x3_seq.add(conv3x3_block(
                                    in_channels=in_channels_list[j],
                                    out_channels=in_channels_list[i],
                                    strides=2,
                                    activation=None,
                                    data_format=data_format,
                                    name="subblock{}".format(k + 1)))
                            else:
                                conv3x3_seq.add(conv3x3_block(
                                    in_channels=in_channels_list[j],
                                    out_channels=in_channels_list[j],
                                    strides=2,
                                    data_format=data_format,
                                    name="subblock{}".format(k + 1)))
                        fuse_layer.add(conv3x3_seq)
                self.fuse_layers.add(fuse_layer)
            self.activ = nn.ReLU()

    def call(self, x, training=None):
        for i in range(self.num_branches):
            x[i] = self.branches[i](x[i], training=training)

        if self.num_branches == 1:
            return x

        x_fuse = []
        for i in range(len(self.fuse_layers)):
            y = x[0] if i == 0 else self.fuse_layers[i][0](x[0], training=training)
            for j in range(1, self.num_branches):
                if i == j:
                    y = y + x[j]
                else:
                    y = y + self.fuse_layers[i][j](x[j], training=training)
            x_fuse.append(self.activ(y))

        return x_fuse
예제 #14
0
class HRNet(tf.keras.Model):
    """
    HRNet model from 'Deep High-Resolution Representation Learning for Visual Recognition,'
    https://arxiv.org/abs/1908.07919.

    Parameters:
    ----------
    channels : list of int
        Number of output channels for each unit.
    init_block_channels : int
        Number of output channels for the initial unit.
    init_num_subblocks : int
        Number of subblocks in the initial unit.
    num_modules : int
        Number of modules per stage.
    num_subblocks : list of int
        Number of subblocks per stage.
    in_channels : int, default 3
        Number of input channels.
    in_size : tuple of two ints, default (224, 224)
        Spatial size of the expected input image.
    classes : int, default 1000
        Number of classification classes.
    data_format : str, default 'channels_last'
        The ordering of the dimensions in tensors.
    """
    def __init__(self,
                 channels,
                 init_block_channels,
                 init_num_subblocks,
                 num_modules,
                 num_subblocks,
                 in_channels=3,
                 in_size=(224, 224),
                 classes=1000,
                 data_format="channels_last",
                 **kwargs):
        super(HRNet, self).__init__(**kwargs)
        self.in_size = in_size
        self.classes = classes
        self.data_format = data_format
        self.branches = [2, 3, 4]

        self.features = SimpleSequential(name="features")
        self.features.add(HRInitBlock(
            in_channels=in_channels,
            out_channels=init_block_channels,
            mid_channels=64,
            num_subblocks=init_num_subblocks,
            data_format=data_format,
            name="init_block"))
        in_channels_list = [init_block_channels]
        for i in range(len(self.branches)):
            self.features.add(HRStage(
                in_channels_list=in_channels_list,
                out_channels_list=channels[i],
                num_modules=num_modules[i],
                num_branches=self.branches[i],
                num_subblocks=num_subblocks[i],
                data_format=data_format,
                name="stage{}".format(i + 1)))
            in_channels_list = self.features[-1].in_channels_list
        self.features.add(HRFinalBlock(
            in_channels_list=in_channels_list,
            out_channels_list=[128, 256, 512, 1024],
            data_format=data_format,
            name="final_block"))
        self.features.add(nn.AveragePooling2D(
            pool_size=7,
            strides=1,
            data_format=data_format,
            name="final_pool"))

        self.output1 = nn.Dense(
            units=classes,
            input_dim=2048,
            name="output1")

    def call(self, x, training=None):
        x = self.features(x, training=training)
        x = flatten(x, self.data_format)
        x = self.output1(x)
        return x
예제 #15
0
class HRStage(nn.Layer):
    """
    HRNet stage block.

    Parameters:
    ----------
    in_channels_list : list of int
        Number of output channels from the previous layer.
    out_channels_list : list of int
        Number of output channels in the current layer.
    num_modules : int
        Number of modules.
    num_branches : int
        Number of branches.
    num_subblocks : list of int
        Number of subblocks.
    data_format : str, default 'channels_last'
        The ordering of the dimensions in tensors.
    """
    def __init__(self,
                 in_channels_list,
                 out_channels_list,
                 num_modules,
                 num_branches,
                 num_subblocks,
                 data_format="channels_last",
                 **kwargs):
        super(HRStage, self).__init__(**kwargs)
        self.branches = num_branches
        self.in_channels_list = out_channels_list
        in_branches = len(in_channels_list)
        out_branches = len(out_channels_list)

        self.transition = SimpleSequential(name="transition")
        for i in range(out_branches):
            if i < in_branches:
                if out_channels_list[i] != in_channels_list[i]:
                    self.transition.add(conv3x3_block(
                        in_channels=in_channels_list[i],
                        out_channels=out_channels_list[i],
                        strides=1,
                        data_format=data_format,
                        name="transition/block{}".format(i + 1)))
                else:
                    self.transition.add(Identity(name="transition/block{}".format(i + 1)))
            else:
                conv3x3_seq = SimpleSequential(name="transition/conv3x3_seq{}".format(i + 1))
                for j in range(i + 1 - in_branches):
                    in_channels_i = in_channels_list[-1]
                    out_channels_i = out_channels_list[i] if j == i - in_branches else in_channels_i
                    conv3x3_seq.add(conv3x3_block(
                        in_channels=in_channels_i,
                        out_channels=out_channels_i,
                        strides=2,
                        data_format=data_format,
                        name="subblock{}".format(j + 1)))
                self.transition.add(conv3x3_seq)

        self.layers = SimpleSequential(name="layers")
        for i in range(num_modules):
            self.layers.add(HRBlock(
                in_channels_list=self.in_channels_list,
                out_channels_list=out_channels_list,
                num_branches=num_branches,
                num_subblocks=num_subblocks,
                data_format=data_format,
                name="block{}".format(i + 1)))
            self.in_channels_list = list(self.layers[-1].in_channels_list)

    def call(self, x, training=None):
        x_list = []
        for j in range(self.branches):
            if not isinstance(self.transition[j], Identity):
                x_list.append(self.transition[j](x[-1] if type(x) in (list, tuple) else x, training=training))
            else:
                x_list_j = x[j] if type(x) in (list, tuple) else x
                x_list.append(x_list_j)
        y_list = self.layers(x_list, training=training)
        return y_list
예제 #16
0
class IbpBackbone(nn.Layer):
    """
    IBPPose backbone.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    activation : function or str or None
        Activation function or name of activation function.
    data_format : str, default 'channels_last'
        The ordering of the dimensions in tensors.
    """
    def __init__(self,
                 in_channels,
                 out_channels,
                 activation,
                 data_format="channels_last",
                 **kwargs):
        super(IbpBackbone, self).__init__(**kwargs)
        self.data_format = data_format
        dilations = (3, 3, 4, 4, 5, 5)
        mid1_channels = out_channels // 4
        mid2_channels = out_channels // 2

        self.conv1 = conv7x7_block(in_channels=in_channels,
                                   out_channels=mid1_channels,
                                   strides=2,
                                   activation=activation,
                                   data_format=data_format,
                                   name="conv1")
        self.res1 = IbpResUnit(in_channels=mid1_channels,
                               out_channels=mid2_channels,
                               activation=activation,
                               data_format=data_format,
                               name="res1")
        self.pool = MaxPool2d(pool_size=2,
                              strides=2,
                              data_format=data_format,
                              name="pool")
        self.res2 = IbpResUnit(in_channels=mid2_channels,
                               out_channels=mid2_channels,
                               activation=activation,
                               data_format=data_format,
                               name="res2")
        self.dilation_branch = SimpleSequential(name="dilation_branch")
        for i, dilation in enumerate(dilations):
            self.dilation_branch.add(
                conv3x3_block(in_channels=mid2_channels,
                              out_channels=mid2_channels,
                              padding=dilation,
                              dilation=dilation,
                              activation=activation,
                              data_format=data_format,
                              name="block{}".format(i + 1)))

    def call(self, x, training=None):
        x = self.conv1(x, training=training)
        x = self.res1(x, training=training)
        x = self.pool(x, training=training)
        x = self.res2(x, training=training)
        y = self.dilation_branch(x, training=training)
        x = tf.concat([x, y], axis=get_channel_axis(self.data_format))
        return x
예제 #17
0
class HarDUnit(nn.Layer):
    """
    HarDNet unit.

    Parameters:
    ----------
    in_channels_list : list of int
        Number of input channels for each block.
    out_channels_list : list of int
        Number of output channels for each block.
    links_list : list of list of int
        List of indices for each layer.
    use_deptwise : bool
        Whether to use depthwise downsampling.
    use_dropout : bool
        Whether to use dropout module.
    downsampling : bool
        Whether to downsample input.
    activation : str
        Name of activation function.
    data_format : str, default 'channels_last'
        The ordering of the dimensions in tensors.
    """
    def __init__(self,
                 in_channels_list,
                 out_channels_list,
                 links_list,
                 use_deptwise,
                 use_dropout,
                 downsampling,
                 activation,
                 data_format="channels_last",
                 **kwargs):
        super(HarDUnit, self).__init__(**kwargs)
        self.data_format = data_format
        self.links_list = links_list
        self.use_dropout = use_dropout
        self.downsampling = downsampling

        self.blocks = SimpleSequential(name="blocks")
        for i in range(len(links_list)):
            in_channels = in_channels_list[i]
            out_channels = out_channels_list[i]
            if use_deptwise:
                unit = invdwsconv3x3_block(in_channels=in_channels,
                                           out_channels=out_channels,
                                           pw_activation=activation,
                                           dw_activation=None,
                                           data_format=data_format,
                                           name="block{}".format(i + 1))
            else:
                unit = conv3x3_block(in_channels=in_channels,
                                     out_channels=out_channels,
                                     data_format=data_format,
                                     name="block{}".format(i + 1))
            self.blocks.add(unit)

        if self.use_dropout:
            self.dropout = nn.Dropout(rate=0.1, name="dropout")
        self.conv = conv1x1_block(in_channels=in_channels_list[-1],
                                  out_channels=out_channels_list[-1],
                                  activation=activation,
                                  data_format=data_format,
                                  name="conv")

        if self.downsampling:
            if use_deptwise:
                self.downsample = dwconv3x3_block(
                    in_channels=out_channels_list[-1],
                    out_channels=out_channels_list[-1],
                    strides=2,
                    activation=None,
                    data_format=data_format,
                    name="downsample")
            else:
                self.downsample = MaxPool2d(pool_size=2,
                                            strides=2,
                                            data_format=data_format,
                                            name="downsample")

    def call(self, x, training=None):
        axis = get_channel_axis(self.data_format)
        layer_outs = [x]
        for links_i, layer_i in zip(self.links_list, self.blocks.children):
            layer_in = []
            for idx_ij in links_i:
                layer_in.append(layer_outs[idx_ij])
            if len(layer_in) > 1:
                x = tf.concat(layer_in, axis=axis)
            else:
                x = layer_in[0]
            out = layer_i(x, training=training)
            layer_outs.append(out)

        outs = []
        for i, layer_out_i in enumerate(layer_outs):
            if (i == len(layer_outs) - 1) or (i % 2 == 1):
                outs.append(layer_out_i)
        x = tf.concat(outs, axis=axis)

        if self.use_dropout:
            x = self.dropout(x, training=training)
        x = self.conv(x, training=training)

        if self.downsampling:
            x = self.downsample(x, training=training)
        return x
예제 #18
0
class MobileNet(tf.keras.Model):
    """
    MobileNet model from 'MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications,'
    https://arxiv.org/abs/1704.04861.

    Parameters:
    ----------
    channels : list of list of int
        Number of output channels for each unit.
    first_stage_stride : bool
        Whether stride is used at the first stage.
    dw_use_bn : bool, default True
        Whether to use BatchNorm layer (depthwise convolution block).
    dw_activation : function or str or None, default 'relu'
        Activation function after the depthwise convolution block.
    in_channels : int, default 3
        Number of input channels.
    in_size : tuple of two ints, default (224, 224)
        Spatial size of the expected input image.
    classes : int, default 1000
        Number of classification classes.
    data_format : str, default 'channels_last'
        The ordering of the dimensions in tensors.
    """
    def __init__(self,
                 channels,
                 first_stage_stride,
                 dw_use_bn=True,
                 dw_activation="relu",
                 in_channels=3,
                 in_size=(224, 224),
                 classes=1000,
                 data_format="channels_last",
                 **kwargs):
        super(MobileNet, self).__init__(**kwargs)
        self.in_size = in_size
        self.classes = classes
        self.data_format = data_format

        self.features = SimpleSequential(name="features")
        init_block_channels = channels[0][0]
        self.features.add(
            conv3x3_block(in_channels=in_channels,
                          out_channels=init_block_channels,
                          strides=2,
                          data_format=data_format,
                          name="init_block"))
        in_channels = init_block_channels
        for i, channels_per_stage in enumerate(channels[1:]):
            stage = SimpleSequential(name="stage{}".format(i + 1))
            for j, out_channels in enumerate(channels_per_stage):
                strides = 2 if (j == 0) and (
                    (i != 0) or first_stage_stride) else 1
                stage.add(
                    dwsconv3x3_block(in_channels=in_channels,
                                     out_channels=out_channels,
                                     strides=strides,
                                     dw_use_bn=dw_use_bn,
                                     dw_activation=dw_activation,
                                     data_format=data_format,
                                     name="unit{}".format(j + 1)))
                in_channels = out_channels
            self.features.add(stage)
        self.features.add(
            nn.AveragePooling2D(pool_size=7,
                                strides=1,
                                data_format=data_format,
                                name="final_pool"))

        self.output1 = nn.Dense(units=classes,
                                input_dim=in_channels,
                                name="output1")

    def call(self, x, training=None):
        x = self.features(x, training=training)
        x = flatten(x, self.data_format)
        x = self.output1(x)
        return x