def build_network(input, trainable): is_trainable(trainable) net = convb(input, 3, 3, out_channel_ratio(16), 2, name="Conv2d_0") # 128, 112 net = slim.stack(net, inverted_bottleneck, [(1, out_channel_ratio(16), 0, 3), (1, out_channel_ratio(16), 0, 3)], scope="Conv2d_1") # 64, 56 net = slim.stack(net, inverted_bottleneck, [ (up_channel_ratio(6), out_channel_ratio(24), 1, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), ], scope="Conv2d_2") net_h_w = int(net.shape[1]) # build network recursively hg_out = hourglass_module(net, STAGE_NUM) for index, l2 in enumerate(l2s): l2_w_h = int(l2.shape[1]) if l2_w_h == net_h_w: continue scale = net_h_w // l2_w_h l2s[index] = upsample(l2, scale, name="upsample_for_loss_%d" % index) return hg_out, l2s
def build_network(self, input, trainable): is_trainable(trainable) intermediate_heatmap_layers = [] tower = convb(input, 3, 3, out_channel_ratio(16), 2, name="Conv2d_0") # 128, 112 tower = inverted_bottleneck(tower, 1, out_channel_ratio(16), 0, 3) tower = inverted_bottleneck(tower, 1, out_channel_ratio(16), 0, 3) # 64, 56 tower = inverted_bottleneck(tower, up_channel_ratio(6), out_channel_ratio(24), 1, 3) tower = inverted_bottleneck(tower, up_channel_ratio(6), out_channel_ratio(24), 0, 3) tower = inverted_bottleneck(tower, up_channel_ratio(6), out_channel_ratio(24), 0, 3) tower = inverted_bottleneck(tower, up_channel_ratio(6), out_channel_ratio(24), 0, 3) tower = inverted_bottleneck(tower, up_channel_ratio(6), out_channel_ratio(24), 0, 3) net_h_w = int(tower.shape[1]) # build network recursively hg_out = self.hourglass_module(tower, STAGE_NUM, intermediate_heatmap_layers) for index, l2 in enumerate(intermediate_heatmap_layers): l2_w_h = int(l2.shape[1]) if l2_w_h == net_h_w: continue scale = net_h_w // l2_w_h intermediate_heatmap_layers[index] = upsample( l2, scale, name="upsample_for_loss_%d" % index) merged_layer = tf.keras.layers.Average()(intermediate_heatmap_layers) return hg_out, merged_layer
def build_network(input, trainable): is_trainable(trainable) net = convb(input, 3, 3, out_channel_ratio(32), 2, name="Conv2d_0") with tf.variable_scope('MobilenetV2'): # 128, 112 mv2_branch_0 = slim.stack(net, inverted_bottleneck, [(1, out_channel_ratio(16), 0, 3), (1, out_channel_ratio(16), 0, 3)], scope="MobilenetV2_part_0") # 64, 56 mv2_branch_1 = slim.stack( mv2_branch_0, inverted_bottleneck, [ (up_channel_ratio(6), out_channel_ratio(24), 1, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), (up_channel_ratio(6), out_channel_ratio(24), 0, 3), ], scope="MobilenetV2_part_1") # 32, 28 mv2_branch_2 = slim.stack( mv2_branch_1, inverted_bottleneck, [ (up_channel_ratio(6), out_channel_ratio(32), 1, 3), (up_channel_ratio(6), out_channel_ratio(32), 0, 3), (up_channel_ratio(6), out_channel_ratio(32), 0, 3), (up_channel_ratio(6), out_channel_ratio(32), 0, 3), (up_channel_ratio(6), out_channel_ratio(32), 0, 3), ], scope="MobilenetV2_part_2") # 16, 14 mv2_branch_3 = slim.stack( mv2_branch_2, inverted_bottleneck, [ (up_channel_ratio(6), out_channel_ratio(64), 1, 3), (up_channel_ratio(6), out_channel_ratio(64), 0, 3), (up_channel_ratio(6), out_channel_ratio(64), 0, 3), (up_channel_ratio(6), out_channel_ratio(64), 0, 3), (up_channel_ratio(6), out_channel_ratio(64), 0, 3), ], scope="MobilenetV2_part_3") # 8, 7 mv2_branch_4 = slim.stack( mv2_branch_3, inverted_bottleneck, [(up_channel_ratio(6), out_channel_ratio(96), 1, 3), (up_channel_ratio(6), out_channel_ratio(96), 0, 3), (up_channel_ratio(6), out_channel_ratio(96), 0, 3), (up_channel_ratio(6), out_channel_ratio(96), 0, 3), (up_channel_ratio(6), out_channel_ratio(96), 0, 3)], scope="MobilenetV2_part_4") cancat_mv2 = tf.concat([ max_pool(mv2_branch_0, 4, 4, 4, 4, name="mv2_0_max_pool"), max_pool(mv2_branch_1, 2, 2, 2, 2, name="mv2_1_max_pool"), mv2_branch_2, upsample(mv2_branch_3, 2, name="mv2_3_upsample"), upsample(mv2_branch_4, 4, name="mv2_4_upsample") ], axis=3) with tf.variable_scope("Convolutional_Pose_Machine"): l2s = [] prev = None for stage_number in range(STAGE_NUM): if prev is not None: inputs = tf.concat([cancat_mv2, prev], axis=3) else: inputs = cancat_mv2 kernel_size = 7 lastest_channel_size = 128 if stage_number == 0: kernel_size = 3 lastest_channel_size = 512 _ = slim.stack( inputs, inverted_bottleneck, [ (2, out_channel_cpm(32), 0, kernel_size), (up_channel_ratio(4), out_channel_cpm(32), 0, kernel_size), (up_channel_ratio(4), out_channel_cpm(32), 0, kernel_size), ], scope="stage_%d_mv2" % stage_number) _ = slim.stack(_, separable_conv, [(out_channel_ratio(lastest_channel_size), 1, 1), (N_KPOINTS, 1, 1)], scope="stage_%d_mv1" % stage_number) prev = _ cpm_out = upsample(_, 4, "stage_%d_out" % stage_number) l2s.append(cpm_out) return cpm_out, l2s