class LwopDecoderFeaturesBend(nn.Layer): """ Lightweight OpenPose 2D/3D specific decoder 3D features bend. Parameters: ---------- in_channels : int Number of input channels. mid_channels : int Number of middle channels. out_channels : int Number of output channels. data_format : str, default 'channels_last' The ordering of the dimensions in tensors. """ def __init__(self, in_channels, mid_channels, out_channels, data_format="channels_last", **kwargs): super(LwopDecoderFeaturesBend, self).__init__(**kwargs) self.body = SimpleSequential(name="body") for i in range(2): self.body.add( LwopRefinementBlock(in_channels=in_channels, out_channels=mid_channels, data_format=data_format, name="block{}".format(i + 1))) in_channels = mid_channels self.features_bend = LwopDecoderBend(in_channels=mid_channels, mid_channels=mid_channels, out_channels=out_channels, data_format=data_format, name="features_bend") def call(self, x, training=None): x = self.body(x, training=training) x = self.features_bend(x, training=training) return x
def __init__(self, in_channels_list, out_channels_list, num_modules, num_branches, num_subblocks, data_format="channels_last", **kwargs): super(HRStage, self).__init__(**kwargs) self.branches = num_branches self.in_channels_list = out_channels_list in_branches = len(in_channels_list) out_branches = len(out_channels_list) self.transition = SimpleSequential(name="transition") for i in range(out_branches): if i < in_branches: if out_channels_list[i] != in_channels_list[i]: self.transition.add(conv3x3_block( in_channels=in_channels_list[i], out_channels=out_channels_list[i], strides=1, data_format=data_format, name="transition/block{}".format(i + 1))) else: self.transition.add(Identity(name="transition/block{}".format(i + 1))) else: conv3x3_seq = SimpleSequential(name="transition/conv3x3_seq{}".format(i + 1)) for j in range(i + 1 - in_branches): in_channels_i = in_channels_list[-1] out_channels_i = out_channels_list[i] if j == i - in_branches else in_channels_i conv3x3_seq.add(conv3x3_block( in_channels=in_channels_i, out_channels=out_channels_i, strides=2, data_format=data_format, name="subblock{}".format(j + 1))) self.transition.add(conv3x3_seq) self.layers = SimpleSequential(name="layers") for i in range(num_modules): self.layers.add(HRBlock( in_channels_list=self.in_channels_list, out_channels_list=out_channels_list, num_branches=num_branches, num_subblocks=num_subblocks, data_format=data_format, name="block{}".format(i + 1))) self.in_channels_list = list(self.layers[-1].in_channels_list)
def __init__(self, in_channels, out_channels, strides, reps, start_with_relu=True, grow_first=True, data_format="channels_last", **kwargs): super(XceptionUnit, self).__init__(**kwargs) self.resize_identity = (in_channels != out_channels) or (strides != 1) if self.resize_identity: self.identity_conv = conv1x1_block(in_channels=in_channels, out_channels=out_channels, strides=strides, activation=None, data_format=data_format, name="identity_conv") self.body = SimpleSequential(name="body") for i in range(reps): if (grow_first and (i == 0)) or ((not grow_first) and (i == reps - 1)): in_channels_i = in_channels out_channels_i = out_channels else: if grow_first: in_channels_i = out_channels out_channels_i = out_channels else: in_channels_i = in_channels out_channels_i = in_channels activate = start_with_relu if (i == 0) else True self.body.children.append( dws_conv3x3_block(in_channels=in_channels_i, out_channels=out_channels_i, activate=activate, data_format=data_format, name="block{}".format(i + 1))) if strides != 1: self.body.children.append( MaxPool2d(pool_size=3, strides=strides, padding=1, data_format=data_format, name="pool"))
def __init__(self, channels, first_stage_stride, dw_use_bn=True, dw_activation="relu", in_channels=3, in_size=(224, 224), classes=1000, data_format="channels_last", **kwargs): super(MobileNet, self).__init__(**kwargs) self.in_size = in_size self.classes = classes self.data_format = data_format self.features = SimpleSequential(name="features") init_block_channels = channels[0][0] self.features.add( conv3x3_block(in_channels=in_channels, out_channels=init_block_channels, strides=2, data_format=data_format, name="init_block")) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels[1:]): stage = SimpleSequential(name="stage{}".format(i + 1)) for j, out_channels in enumerate(channels_per_stage): strides = 2 if (j == 0) and ( (i != 0) or first_stage_stride) else 1 stage.add( dwsconv3x3_block(in_channels=in_channels, out_channels=out_channels, strides=strides, dw_use_bn=dw_use_bn, dw_activation=dw_activation, data_format=data_format, name="unit{}".format(j + 1))) in_channels = out_channels self.features.add(stage) self.features.add( nn.AveragePooling2D(pool_size=7, strides=1, data_format=data_format, name="final_pool")) self.output1 = nn.Dense(units=classes, input_dim=in_channels, name="output1")
def __init__(self, in_channels, out_channels, mid_channels_list, kernel_size_list, strides_list, padding_list, data_format="channels_last", **kwargs): super(ConvSeq3x3Branch, self).__init__(**kwargs) self.data_format = data_format self.conv_list = SimpleSequential(name="conv_list") for i, (mid_channels, kernel_size, strides, padding) in enumerate( zip(mid_channels_list, kernel_size_list, strides_list, padding_list)): self.conv_list.children.append( InceptConv(in_channels=in_channels, out_channels=mid_channels, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, name="conv{}".format(i + 1))) in_channels = mid_channels self.conv1x3 = InceptConv(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 3), strides=1, padding=(0, 1), data_format=data_format, name="conv1x3") self.conv3x1 = InceptConv(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 1), strides=1, padding=(1, 0), data_format=data_format, name="conv3x1")
class SBStage(nn.Layer): """ SB stage. Parameters: ---------- in_channels : int Number of input channels. down_channels : int Number of output channels for a downscale block. channels_list : list of int Number of output channels for all residual block. kernel_sizes_list : list of int Convolution window size for branches. scale_factors_list : list of int Scale factor for branches. use_residual_list : list of int List of flags for using residual in each ESP-block. se_reduction : int Squeeze reduction value (0 means no-se). in_size : tuple of 2 int Spatial size of the output tensor for the bilinear upsampling operation. bn_eps : float Small float added to variance in Batch norm. data_format : str, default 'channels_last' The ordering of the dimensions in tensors. """ def __init__(self, in_channels, down_channels, channels_list, kernel_sizes_list, scale_factors_list, use_residual_list, se_reduction, in_size, bn_eps, data_format="channels_last", **kwargs): super(SBStage, self).__init__(**kwargs) self.data_format = data_format self.down_conv = dwsconv3x3_block( in_channels=in_channels, out_channels=down_channels, strides=2, dw_use_bn=False, bn_eps=bn_eps, dw_activation=None, pw_activation=(lambda: PReLU2( down_channels, data_format=data_format, name="activ")), se_reduction=se_reduction, data_format=data_format, name="down_conv") in_channels = down_channels self.main_branch = SimpleSequential(name="main_branch") for i, out_channels in enumerate(channels_list): use_residual = (use_residual_list[i] == 1) kernel_sizes = kernel_sizes_list[i] scale_factors = scale_factors_list[i] self.main_branch.add( ESPBlock(in_channels=in_channels, out_channels=out_channels, kernel_sizes=kernel_sizes, scale_factors=scale_factors, use_residual=use_residual, in_size=((in_size[0] // 2, in_size[1] // 2) if in_size else None), bn_eps=bn_eps, data_format=data_format, name="block{}".format(i + 1))) in_channels = out_channels self.preactiv = PreActivation(in_channels=(down_channels + in_channels), bn_eps=bn_eps, data_format=data_format, name="preactiv") def call(self, x, training=None): x = self.down_conv(x, training=None) y = self.main_branch(x, training=None) x = tf.concat([x, y], axis=get_channel_axis(self.data_format)) x = self.preactiv(x, training=None) return x, y
def __init__(self, encoder_channels, encoder_paddings, encoder_init_block_channels, encoder_final_block_channels, refinement_units, calc_3d_features, return_heatmap=True, in_channels=3, in_size=(368, 368), keypoints=19, data_format="channels_last", **kwargs): super(LwOpenPose, self).__init__(**kwargs) assert (in_channels == 3) self.in_size = in_size self.keypoints = keypoints self.data_format = data_format self.return_heatmap = return_heatmap self.calc_3d_features = calc_3d_features num_heatmap_paf = 3 * keypoints self.encoder = tf.keras.Sequential(name="encoder") backbone = SimpleSequential(name="backbone") backbone.add( conv3x3_block(in_channels=in_channels, out_channels=encoder_init_block_channels, strides=2, data_format=data_format, name="init_block")) in_channels = encoder_init_block_channels for i, channels_per_stage in enumerate(encoder_channels): stage = SimpleSequential(name="stage{}".format(i + 1)) for j, out_channels in enumerate(channels_per_stage): strides = 2 if (j == 0) and (i != 0) else 1 padding = encoder_paddings[i][j] stage.add( dwsconv3x3_block(in_channels=in_channels, out_channels=out_channels, strides=strides, padding=padding, dilation=padding, data_format=data_format, name="unit{}".format(j + 1))) in_channels = out_channels backbone.add(stage) self.encoder.add(backbone) self.encoder.add( LwopEncoderFinalBlock(in_channels=in_channels, out_channels=encoder_final_block_channels, data_format=data_format, name="final_block")) in_channels = encoder_final_block_channels self.decoder = tf.keras.Sequential(name="decoder") self.decoder.add( LwopDecoderInitBlock(in_channels=in_channels, keypoints=keypoints, data_format=data_format, name="init_block")) in_channels = encoder_final_block_channels + num_heatmap_paf for i in range(refinement_units): self.decoder.add( LwopDecoderUnit(in_channels=in_channels, keypoints=keypoints, data_format=data_format, name="unit{}".format(i + 1))) self.decoder.add( LwopDecoderFinalBlock(in_channels=in_channels, keypoints=keypoints, bottleneck_factor=2, calc_3d_features=calc_3d_features, data_format=data_format, name="final_block"))
class IbpPose(tf.keras.Model): """ IBPPose model from 'Simple Pose: Rethinking and Improving a Bottom-up Approach for Multi-Person Pose Estimation,' https://arxiv.org/abs/1911.10529. Parameters: ---------- passes : int Number of passes. backbone_out_channels : int Number of output channels for the backbone. outs_channels : int Number of output channels for the backbone. depth : int Depth of hourglass. growth_rate : int Addition for number of channel for each level. use_bn : bool Whether to use BatchNorm layer. in_channels : int, default 3 Number of input channels. in_size : tuple of two ints, default (256, 256) Spatial size of the expected input image. data_format : str, default 'channels_last' The ordering of the dimensions in tensors. """ def __init__(self, passes, backbone_out_channels, outs_channels, depth, growth_rate, use_bn, in_channels=3, in_size=(256, 256), data_format="channels_last", **kwargs): super(IbpPose, self).__init__(**kwargs) self.in_size = in_size self.data_format = data_format activation = nn.LeakyReLU(alpha=0.01) self.backbone = IbpBackbone(in_channels=in_channels, out_channels=backbone_out_channels, activation=activation, data_format=data_format, name="backbone") self.decoder = SimpleSequential(name="decoder") for i in range(passes): merge = (i != passes - 1) self.decoder.add( IbpPass(channels=backbone_out_channels, mid_channels=outs_channels, depth=depth, growth_rate=growth_rate, merge=merge, use_bn=use_bn, activation=activation, data_format=data_format, name="pass{}".format(i + 1))) def call(self, x, training=None): x = self.backbone(x, training=training) x_prev = None for block in self.decoder.children: if x_prev is not None: x = x + x_prev x_prev = block(x, x_prev, training=training) return x_prev
def __init__(self, channels, mid_channels, depth, growth_rate, merge, use_bn, activation, data_format="channels_last", **kwargs): super(IbpPass, self).__init__(**kwargs) self.merge = merge down_seq = SimpleSequential(name="down_seq") up_seq = SimpleSequential(name="up_seq") skip_seq = SimpleSequential(name="skip_seq") top_channels = channels bottom_channels = channels for i in range(depth + 1): skip_seq.add( IbpResUnit(in_channels=top_channels, out_channels=top_channels, activation=activation, data_format=data_format, name="skip{}".format(i + 1))) bottom_channels += growth_rate if i < depth: down_seq.add( IbpDownBlock(in_channels=top_channels, out_channels=bottom_channels, activation=activation, data_format=data_format, name="down{}".format(i + 1))) up_seq.add( IbpUpBlock(in_channels=bottom_channels, out_channels=top_channels, use_bn=use_bn, activation=activation, data_format=data_format, name="up{}".format(i + 1))) top_channels = bottom_channels self.hg = Hourglass(down_seq=down_seq, up_seq=up_seq, skip_seq=skip_seq, name="hg") self.pre_block = IbpPreBlock(out_channels=channels, use_bn=use_bn, activation=activation, data_format=data_format, name="pre_block") self.post_block = conv1x1_block(in_channels=channels, out_channels=mid_channels, use_bias=True, use_bn=False, activation=None, data_format=data_format, name="post_block") if self.merge: self.pre_merge_block = MergeBlock(in_channels=channels, out_channels=channels, use_bn=use_bn, data_format=data_format, name="pre_merge_block") self.post_merge_block = MergeBlock(in_channels=mid_channels, out_channels=channels, use_bn=use_bn, data_format=data_format, name="post_merge_block")
def __init__(self, audio_features, audio_window_size, seq_len, encoder_features, data_format="channels_last", **kwargs): super(NvpAttExpEncoder, self).__init__(**kwargs) self.audio_features = audio_features self.audio_window_size = audio_window_size self.seq_len = seq_len self.data_format = data_format conv_channels = (32, 32, 64, 64) conv_slopes = (0.02, 0.02, 0.2, 0.2) fc_channels = (128, 64, encoder_features) fc_slopes = (0.02, 0.02, None) att_conv_channels = (16, 8, 4, 2, 1) att_conv_slopes = 0.02 in_channels = audio_features self.conv_branch = SimpleSequential(name="conv_branch") for i, (out_channels, slope) in enumerate(zip(conv_channels, conv_slopes)): self.conv_branch.add( ConvBlock(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 1), strides=(2, 1), padding=(1, 0), use_bias=True, use_bn=False, activation=nn.LeakyReLU(alpha=slope), data_format=data_format, name="conv{}".format(i + 1))) in_channels = out_channels self.fc_branch = SimpleSequential(name="fc_branch") for i, (out_channels, slope) in enumerate(zip(fc_channels, fc_slopes)): activation = nn.LeakyReLU( alpha=slope) if slope is not None else "tanh" self.fc_branch.add( DenseBlock(in_channels=in_channels, out_channels=out_channels, use_bias=True, use_bn=False, activation=activation, data_format=data_format, name="fc{}".format(i + 1))) in_channels = out_channels self.att_conv_branch = SimpleSequential(name="att_conv_branch") for i, out_channels, in enumerate(att_conv_channels): self.att_conv_branch.add( ConvBlock1d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, strides=1, padding=1, use_bias=True, use_bn=False, activation=nn.LeakyReLU(alpha=att_conv_slopes), data_format=data_format, name="att_conv{}".format(i + 1))) in_channels = out_channels self.att_fc = DenseBlock(in_channels=seq_len, out_channels=seq_len, use_bias=True, use_bn=False, activation=nn.Softmax(axis=1), data_format=data_format, name="att_fc")
class NvpAttExpEncoder(nn.Layer): """ Neural Voice Puppetry Audio-to-Expression encoder. Parameters: ---------- audio_features : int Number of audio features (characters/sounds). audio_window_size : int Size of audio window (for time related audio features). seq_len : int, default Size of feature window. encoder_features : int Number of encoder features. data_format : str, default 'channels_last' The ordering of the dimensions in tensors. """ def __init__(self, audio_features, audio_window_size, seq_len, encoder_features, data_format="channels_last", **kwargs): super(NvpAttExpEncoder, self).__init__(**kwargs) self.audio_features = audio_features self.audio_window_size = audio_window_size self.seq_len = seq_len self.data_format = data_format conv_channels = (32, 32, 64, 64) conv_slopes = (0.02, 0.02, 0.2, 0.2) fc_channels = (128, 64, encoder_features) fc_slopes = (0.02, 0.02, None) att_conv_channels = (16, 8, 4, 2, 1) att_conv_slopes = 0.02 in_channels = audio_features self.conv_branch = SimpleSequential(name="conv_branch") for i, (out_channels, slope) in enumerate(zip(conv_channels, conv_slopes)): self.conv_branch.add( ConvBlock(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 1), strides=(2, 1), padding=(1, 0), use_bias=True, use_bn=False, activation=nn.LeakyReLU(alpha=slope), data_format=data_format, name="conv{}".format(i + 1))) in_channels = out_channels self.fc_branch = SimpleSequential(name="fc_branch") for i, (out_channels, slope) in enumerate(zip(fc_channels, fc_slopes)): activation = nn.LeakyReLU( alpha=slope) if slope is not None else "tanh" self.fc_branch.add( DenseBlock(in_channels=in_channels, out_channels=out_channels, use_bias=True, use_bn=False, activation=activation, data_format=data_format, name="fc{}".format(i + 1))) in_channels = out_channels self.att_conv_branch = SimpleSequential(name="att_conv_branch") for i, out_channels, in enumerate(att_conv_channels): self.att_conv_branch.add( ConvBlock1d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, strides=1, padding=1, use_bias=True, use_bn=False, activation=nn.LeakyReLU(alpha=att_conv_slopes), data_format=data_format, name="att_conv{}".format(i + 1))) in_channels = out_channels self.att_fc = DenseBlock(in_channels=seq_len, out_channels=seq_len, use_bias=True, use_bn=False, activation=nn.Softmax(axis=1), data_format=data_format, name="att_fc") def call(self, x, training=None): batch = x.shape[0] batch_seq_len = batch * self.seq_len if is_channels_first(self.data_format): x = tf.reshape(x, shape=(-1, 1, self.audio_window_size, self.audio_features)) x = tf.transpose(x, perm=(0, 3, 2, 1)) x = self.conv_branch(x) x = tf.squeeze(x, axis=-1) x = tf.reshape(x, shape=(batch_seq_len, 1, -1)) x = self.fc_branch(x) x = tf.reshape(x, shape=(batch, self.seq_len, -1)) x = tf.transpose(x, perm=(0, 2, 1)) y = x[:, :, (self.seq_len // 2)] w = self.att_conv_branch(x) w = tf.squeeze(w, axis=1) w = self.att_fc(w) w = tf.expand_dims(w, axis=-1) else: x = tf.transpose(x, perm=(0, 3, 1, 2)) x = tf.reshape(x, shape=(-1, 1, self.audio_window_size, self.audio_features)) x = tf.transpose(x, perm=(0, 2, 3, 1)) x = tf.transpose(x, perm=(0, 1, 3, 2)) x = self.conv_branch(x) x = tf.squeeze(x, axis=1) x = self.fc_branch(x) x = tf.reshape(x, shape=(batch, self.seq_len, -1)) y = x[:, (self.seq_len // 2), :] w = self.att_conv_branch(x) w = tf.squeeze(w, axis=-1) w = self.att_fc(w) w = tf.expand_dims(w, axis=-1) x = tf.transpose(x, perm=(0, 2, 1)) x = tf.keras.backend.batch_dot(x, w) x = tf.squeeze(x, axis=-1) return x, y
def __init__(self, in_channels_list, out_channels_list, num_branches, num_subblocks, data_format="channels_last", **kwargs): super(HRBlock, self).__init__(**kwargs) self.in_channels_list = in_channels_list self.num_branches = num_branches self.branches = SimpleSequential(name="branches") for i in range(num_branches): layers = SimpleSequential(name="branches/branch{}".format(i + 1)) in_channels_i = self.in_channels_list[i] out_channels_i = out_channels_list[i] for j in range(num_subblocks[i]): layers.add(ResUnit( in_channels=in_channels_i, out_channels=out_channels_i, strides=1, bottleneck=False, data_format=data_format, name="unit{}".format(j + 1))) in_channels_i = out_channels_i self.in_channels_list[i] = out_channels_i self.branches.add(layers) if num_branches > 1: self.fuse_layers = SimpleSequential(name="fuse_layers") for i in range(num_branches): fuse_layer_name = "fuse_layers/fuse_layer{}".format(i + 1) fuse_layer = SimpleSequential(name=fuse_layer_name) for j in range(num_branches): if j > i: fuse_layer.add(UpSamplingBlock( in_channels=in_channels_list[j], out_channels=in_channels_list[i], scale_factor=2 ** (j - i), data_format=data_format, name=fuse_layer_name + "/block{}".format(j + 1))) elif j == i: fuse_layer.add(Identity(name=fuse_layer_name + "/block{}".format(j + 1))) else: conv3x3_seq_name = fuse_layer_name + "/block{}_conv3x3_seq".format(j + 1) conv3x3_seq = SimpleSequential(name=conv3x3_seq_name) for k in range(i - j): if k == i - j - 1: conv3x3_seq.add(conv3x3_block( in_channels=in_channels_list[j], out_channels=in_channels_list[i], strides=2, activation=None, data_format=data_format, name="subblock{}".format(k + 1))) else: conv3x3_seq.add(conv3x3_block( in_channels=in_channels_list[j], out_channels=in_channels_list[j], strides=2, data_format=data_format, name="subblock{}".format(k + 1))) fuse_layer.add(conv3x3_seq) self.fuse_layers.add(fuse_layer) self.activ = nn.ReLU()
class HRBlock(nn.Layer): """ HFNet block. Parameters: ---------- in_channels_list : list of int Number of input channels. out_channels_list : list of int Number of output channels. num_branches : int Number of branches. num_subblocks : list of int Number of subblock. data_format : str, default 'channels_last' The ordering of the dimensions in tensors. """ def __init__(self, in_channels_list, out_channels_list, num_branches, num_subblocks, data_format="channels_last", **kwargs): super(HRBlock, self).__init__(**kwargs) self.in_channels_list = in_channels_list self.num_branches = num_branches self.branches = SimpleSequential(name="branches") for i in range(num_branches): layers = SimpleSequential(name="branches/branch{}".format(i + 1)) in_channels_i = self.in_channels_list[i] out_channels_i = out_channels_list[i] for j in range(num_subblocks[i]): layers.add(ResUnit( in_channels=in_channels_i, out_channels=out_channels_i, strides=1, bottleneck=False, data_format=data_format, name="unit{}".format(j + 1))) in_channels_i = out_channels_i self.in_channels_list[i] = out_channels_i self.branches.add(layers) if num_branches > 1: self.fuse_layers = SimpleSequential(name="fuse_layers") for i in range(num_branches): fuse_layer_name = "fuse_layers/fuse_layer{}".format(i + 1) fuse_layer = SimpleSequential(name=fuse_layer_name) for j in range(num_branches): if j > i: fuse_layer.add(UpSamplingBlock( in_channels=in_channels_list[j], out_channels=in_channels_list[i], scale_factor=2 ** (j - i), data_format=data_format, name=fuse_layer_name + "/block{}".format(j + 1))) elif j == i: fuse_layer.add(Identity(name=fuse_layer_name + "/block{}".format(j + 1))) else: conv3x3_seq_name = fuse_layer_name + "/block{}_conv3x3_seq".format(j + 1) conv3x3_seq = SimpleSequential(name=conv3x3_seq_name) for k in range(i - j): if k == i - j - 1: conv3x3_seq.add(conv3x3_block( in_channels=in_channels_list[j], out_channels=in_channels_list[i], strides=2, activation=None, data_format=data_format, name="subblock{}".format(k + 1))) else: conv3x3_seq.add(conv3x3_block( in_channels=in_channels_list[j], out_channels=in_channels_list[j], strides=2, data_format=data_format, name="subblock{}".format(k + 1))) fuse_layer.add(conv3x3_seq) self.fuse_layers.add(fuse_layer) self.activ = nn.ReLU() def call(self, x, training=None): for i in range(self.num_branches): x[i] = self.branches[i](x[i], training=training) if self.num_branches == 1: return x x_fuse = [] for i in range(len(self.fuse_layers)): y = x[0] if i == 0 else self.fuse_layers[i][0](x[0], training=training) for j in range(1, self.num_branches): if i == j: y = y + x[j] else: y = y + self.fuse_layers[i][j](x[j], training=training) x_fuse.append(self.activ(y)) return x_fuse
class HRNet(tf.keras.Model): """ HRNet model from 'Deep High-Resolution Representation Learning for Visual Recognition,' https://arxiv.org/abs/1908.07919. Parameters: ---------- channels : list of int Number of output channels for each unit. init_block_channels : int Number of output channels for the initial unit. init_num_subblocks : int Number of subblocks in the initial unit. num_modules : int Number of modules per stage. num_subblocks : list of int Number of subblocks per stage. in_channels : int, default 3 Number of input channels. in_size : tuple of two ints, default (224, 224) Spatial size of the expected input image. classes : int, default 1000 Number of classification classes. data_format : str, default 'channels_last' The ordering of the dimensions in tensors. """ def __init__(self, channels, init_block_channels, init_num_subblocks, num_modules, num_subblocks, in_channels=3, in_size=(224, 224), classes=1000, data_format="channels_last", **kwargs): super(HRNet, self).__init__(**kwargs) self.in_size = in_size self.classes = classes self.data_format = data_format self.branches = [2, 3, 4] self.features = SimpleSequential(name="features") self.features.add(HRInitBlock( in_channels=in_channels, out_channels=init_block_channels, mid_channels=64, num_subblocks=init_num_subblocks, data_format=data_format, name="init_block")) in_channels_list = [init_block_channels] for i in range(len(self.branches)): self.features.add(HRStage( in_channels_list=in_channels_list, out_channels_list=channels[i], num_modules=num_modules[i], num_branches=self.branches[i], num_subblocks=num_subblocks[i], data_format=data_format, name="stage{}".format(i + 1))) in_channels_list = self.features[-1].in_channels_list self.features.add(HRFinalBlock( in_channels_list=in_channels_list, out_channels_list=[128, 256, 512, 1024], data_format=data_format, name="final_block")) self.features.add(nn.AveragePooling2D( pool_size=7, strides=1, data_format=data_format, name="final_pool")) self.output1 = nn.Dense( units=classes, input_dim=2048, name="output1") def call(self, x, training=None): x = self.features(x, training=training) x = flatten(x, self.data_format) x = self.output1(x) return x
class HRStage(nn.Layer): """ HRNet stage block. Parameters: ---------- in_channels_list : list of int Number of output channels from the previous layer. out_channels_list : list of int Number of output channels in the current layer. num_modules : int Number of modules. num_branches : int Number of branches. num_subblocks : list of int Number of subblocks. data_format : str, default 'channels_last' The ordering of the dimensions in tensors. """ def __init__(self, in_channels_list, out_channels_list, num_modules, num_branches, num_subblocks, data_format="channels_last", **kwargs): super(HRStage, self).__init__(**kwargs) self.branches = num_branches self.in_channels_list = out_channels_list in_branches = len(in_channels_list) out_branches = len(out_channels_list) self.transition = SimpleSequential(name="transition") for i in range(out_branches): if i < in_branches: if out_channels_list[i] != in_channels_list[i]: self.transition.add(conv3x3_block( in_channels=in_channels_list[i], out_channels=out_channels_list[i], strides=1, data_format=data_format, name="transition/block{}".format(i + 1))) else: self.transition.add(Identity(name="transition/block{}".format(i + 1))) else: conv3x3_seq = SimpleSequential(name="transition/conv3x3_seq{}".format(i + 1)) for j in range(i + 1 - in_branches): in_channels_i = in_channels_list[-1] out_channels_i = out_channels_list[i] if j == i - in_branches else in_channels_i conv3x3_seq.add(conv3x3_block( in_channels=in_channels_i, out_channels=out_channels_i, strides=2, data_format=data_format, name="subblock{}".format(j + 1))) self.transition.add(conv3x3_seq) self.layers = SimpleSequential(name="layers") for i in range(num_modules): self.layers.add(HRBlock( in_channels_list=self.in_channels_list, out_channels_list=out_channels_list, num_branches=num_branches, num_subblocks=num_subblocks, data_format=data_format, name="block{}".format(i + 1))) self.in_channels_list = list(self.layers[-1].in_channels_list) def call(self, x, training=None): x_list = [] for j in range(self.branches): if not isinstance(self.transition[j], Identity): x_list.append(self.transition[j](x[-1] if type(x) in (list, tuple) else x, training=training)) else: x_list_j = x[j] if type(x) in (list, tuple) else x x_list.append(x_list_j) y_list = self.layers(x_list, training=training) return y_list
class IbpBackbone(nn.Layer): """ IBPPose backbone. Parameters: ---------- in_channels : int Number of input channels. out_channels : int Number of output channels. activation : function or str or None Activation function or name of activation function. data_format : str, default 'channels_last' The ordering of the dimensions in tensors. """ def __init__(self, in_channels, out_channels, activation, data_format="channels_last", **kwargs): super(IbpBackbone, self).__init__(**kwargs) self.data_format = data_format dilations = (3, 3, 4, 4, 5, 5) mid1_channels = out_channels // 4 mid2_channels = out_channels // 2 self.conv1 = conv7x7_block(in_channels=in_channels, out_channels=mid1_channels, strides=2, activation=activation, data_format=data_format, name="conv1") self.res1 = IbpResUnit(in_channels=mid1_channels, out_channels=mid2_channels, activation=activation, data_format=data_format, name="res1") self.pool = MaxPool2d(pool_size=2, strides=2, data_format=data_format, name="pool") self.res2 = IbpResUnit(in_channels=mid2_channels, out_channels=mid2_channels, activation=activation, data_format=data_format, name="res2") self.dilation_branch = SimpleSequential(name="dilation_branch") for i, dilation in enumerate(dilations): self.dilation_branch.add( conv3x3_block(in_channels=mid2_channels, out_channels=mid2_channels, padding=dilation, dilation=dilation, activation=activation, data_format=data_format, name="block{}".format(i + 1))) def call(self, x, training=None): x = self.conv1(x, training=training) x = self.res1(x, training=training) x = self.pool(x, training=training) x = self.res2(x, training=training) y = self.dilation_branch(x, training=training) x = tf.concat([x, y], axis=get_channel_axis(self.data_format)) return x
class HarDUnit(nn.Layer): """ HarDNet unit. Parameters: ---------- in_channels_list : list of int Number of input channels for each block. out_channels_list : list of int Number of output channels for each block. links_list : list of list of int List of indices for each layer. use_deptwise : bool Whether to use depthwise downsampling. use_dropout : bool Whether to use dropout module. downsampling : bool Whether to downsample input. activation : str Name of activation function. data_format : str, default 'channels_last' The ordering of the dimensions in tensors. """ def __init__(self, in_channels_list, out_channels_list, links_list, use_deptwise, use_dropout, downsampling, activation, data_format="channels_last", **kwargs): super(HarDUnit, self).__init__(**kwargs) self.data_format = data_format self.links_list = links_list self.use_dropout = use_dropout self.downsampling = downsampling self.blocks = SimpleSequential(name="blocks") for i in range(len(links_list)): in_channels = in_channels_list[i] out_channels = out_channels_list[i] if use_deptwise: unit = invdwsconv3x3_block(in_channels=in_channels, out_channels=out_channels, pw_activation=activation, dw_activation=None, data_format=data_format, name="block{}".format(i + 1)) else: unit = conv3x3_block(in_channels=in_channels, out_channels=out_channels, data_format=data_format, name="block{}".format(i + 1)) self.blocks.add(unit) if self.use_dropout: self.dropout = nn.Dropout(rate=0.1, name="dropout") self.conv = conv1x1_block(in_channels=in_channels_list[-1], out_channels=out_channels_list[-1], activation=activation, data_format=data_format, name="conv") if self.downsampling: if use_deptwise: self.downsample = dwconv3x3_block( in_channels=out_channels_list[-1], out_channels=out_channels_list[-1], strides=2, activation=None, data_format=data_format, name="downsample") else: self.downsample = MaxPool2d(pool_size=2, strides=2, data_format=data_format, name="downsample") def call(self, x, training=None): axis = get_channel_axis(self.data_format) layer_outs = [x] for links_i, layer_i in zip(self.links_list, self.blocks.children): layer_in = [] for idx_ij in links_i: layer_in.append(layer_outs[idx_ij]) if len(layer_in) > 1: x = tf.concat(layer_in, axis=axis) else: x = layer_in[0] out = layer_i(x, training=training) layer_outs.append(out) outs = [] for i, layer_out_i in enumerate(layer_outs): if (i == len(layer_outs) - 1) or (i % 2 == 1): outs.append(layer_out_i) x = tf.concat(outs, axis=axis) if self.use_dropout: x = self.dropout(x, training=training) x = self.conv(x, training=training) if self.downsampling: x = self.downsample(x, training=training) return x
class MobileNet(tf.keras.Model): """ MobileNet model from 'MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. Parameters: ---------- channels : list of list of int Number of output channels for each unit. first_stage_stride : bool Whether stride is used at the first stage. dw_use_bn : bool, default True Whether to use BatchNorm layer (depthwise convolution block). dw_activation : function or str or None, default 'relu' Activation function after the depthwise convolution block. in_channels : int, default 3 Number of input channels. in_size : tuple of two ints, default (224, 224) Spatial size of the expected input image. classes : int, default 1000 Number of classification classes. data_format : str, default 'channels_last' The ordering of the dimensions in tensors. """ def __init__(self, channels, first_stage_stride, dw_use_bn=True, dw_activation="relu", in_channels=3, in_size=(224, 224), classes=1000, data_format="channels_last", **kwargs): super(MobileNet, self).__init__(**kwargs) self.in_size = in_size self.classes = classes self.data_format = data_format self.features = SimpleSequential(name="features") init_block_channels = channels[0][0] self.features.add( conv3x3_block(in_channels=in_channels, out_channels=init_block_channels, strides=2, data_format=data_format, name="init_block")) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels[1:]): stage = SimpleSequential(name="stage{}".format(i + 1)) for j, out_channels in enumerate(channels_per_stage): strides = 2 if (j == 0) and ( (i != 0) or first_stage_stride) else 1 stage.add( dwsconv3x3_block(in_channels=in_channels, out_channels=out_channels, strides=strides, dw_use_bn=dw_use_bn, dw_activation=dw_activation, data_format=data_format, name="unit{}".format(j + 1))) in_channels = out_channels self.features.add(stage) self.features.add( nn.AveragePooling2D(pool_size=7, strides=1, data_format=data_format, name="final_pool")) self.output1 = nn.Dense(units=classes, input_dim=in_channels, name="output1") def call(self, x, training=None): x = self.features(x, training=training) x = flatten(x, self.data_format) x = self.output1(x) return x