class Init_stage(Model): def __init__(self,n_confmaps=19,n_pafmaps=38,in_channels=704,data_format="channels_first"): self.n_confmaps=n_confmaps self.n_pafmaps=n_pafmaps self.in_channels=in_channels self.data_format=data_format #conf block self.conf_block=LayerList([ separable_block(n_filter=128,in_channels=self.in_channels,filter_size=(3,3),strides=(1,1),act=tf.nn.relu,data_format=self.data_format), separable_block(n_filter=128,in_channels=128,filter_size=(3,3),strides=(1,1),act=tf.nn.relu,data_format=self.data_format), separable_block(n_filter=128,in_channels=128,filter_size=(3,3),strides=(1,1),act=tf.nn.relu,data_format=self.data_format), separable_block(n_filter=512,in_channels=128,filter_size=(1,1),strides=(1,1),act=tf.nn.relu,data_format=self.data_format), separable_block(n_filter=self.n_confmaps,in_channels=512,filter_size=(1,1),strides=(1,1),act=None,data_format=self.data_format) ]) #paf block self.paf_block=LayerList([ separable_block(n_filter=128,in_channels=self.in_channels,filter_size=(3,3),strides=(1,1),act=tf.nn.relu,data_format=self.data_format), separable_block(n_filter=128,in_channels=128,filter_size=(3,3),strides=(1,1),act=tf.nn.relu,data_format=self.data_format), separable_block(n_filter=128,in_channels=128,filter_size=(3,3),strides=(1,1),act=tf.nn.relu,data_format=self.data_format), separable_block(n_filter=512,in_channels=128,filter_size=(1,1),strides=(1,1),act=tf.nn.relu,data_format=self.data_format), separable_block(n_filter=self.n_pafmaps,in_channels=512,filter_size=(1,1),strides=(1,1),act=None,data_format=self.data_format) ]) def forward(self,x): conf_map=self.conf_block.forward(x) paf_map=self.paf_block.forward(x) return conf_map,paf_map
class Basic_block(Model): def __init__(self, in_channels=64, n_filter=64, strides=(1, 1), data_format="channels_first"): super().__init__() self.in_channels = in_channels self.n_filter = n_filter self.strides = strides self.data_format = data_format self.downsample = None if (self.strides != (1, 1) or self.in_channels != 4 * self.n_filter): self.downsample=LayerList([ Conv2d(n_filter=4*self.n_filter,in_channels=self.in_channels,filter_size=(1,1),strides=self.strides,b_init=None,\ data_format=self.data_format), BatchNorm2d(is_train=True,num_features=4*self.n_filter,data_format=self.data_format) ]) self.main_block = LayerList([ Conv2d(n_filter=self.n_filter, in_channels=self.in_channels, filter_size=(1, 1), strides=(1, 1), b_init=None, data_format=self.data_format), BatchNorm2d(is_train=True, num_features=self.n_filter, act=tf.nn.relu, data_format=self.data_format), Conv2d(n_filter=self.n_filter, in_channels=self.n_filter, filter_size=(3, 3), strides=self.strides, b_init=None, data_format=self.data_format), BatchNorm2d(is_train=True, num_features=self.n_filter, act=tf.nn.relu, data_format=self.data_format), Conv2d(n_filter=4 * self.n_filter, in_channels=self.n_filter, filter_size=(1, 1), strides=(1, 1), b_init=None, data_format=self.data_format), BatchNorm2d( is_train=True, num_features=4 * self.n_filter, data_format=self.data_format, ) ]) def forward(self, x): res = x x = self.main_block.forward(x) if (self.downsample != None): res = self.downsample.forward(res) return tf.nn.relu(x + res)
class Res_block(Model): def __init__(self, n_filter, in_channels, strides=(1, 1), is_down_sample=False, data_format="channels_first"): super().__init__() self.data_format = data_format self.is_down_sample = is_down_sample self.main_block = LayerList([ Conv2d(n_filter=n_filter, in_channels=in_channels, filter_size=(3, 3), strides=strides, b_init=None, data_format=self.data_format), BatchNorm2d(decay=0.9, act=tf.nn.relu, is_train=True, num_features=n_filter, data_format=self.data_format), Conv2d(n_filter=n_filter, in_channels=n_filter, filter_size=(3, 3), strides=(1, 1), b_init=None, data_format=self.data_format), BatchNorm2d(decay=0.9, is_train=True, num_features=n_filter, data_format=self.data_format), ]) if (self.is_down_sample): self.down_sample = LayerList([ Conv2d(n_filter=n_filter, in_channels=in_channels, filter_size=(3, 3), strides=strides, b_init=None, data_format=self.data_format), BatchNorm2d(decay=0.9, is_train=True, num_features=n_filter, data_format=self.data_format) ]) def forward(self, x): res = x x = self.main_block.forward(x) if (self.is_down_sample): res = self.down_sample.forward(res) return tf.nn.relu(x + res)
class Discriminator(Model): def __init__(self, layer_list, data_format="channels_first"): self.data_format = data_format self.main_block = LayerList(layer_list) def forward(self, x): return self.main_block.forward(x) def cal_loss(self, x, labels): return tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=x)
class InvertedResidual(Model): def __init__(self, n_filter=128, in_channels=128, strides=(1, 1), exp_ratio=6, data_format="channels_first"): super().__init__() self.n_filter = n_filter self.in_channels = in_channels self.strides = strides self.exp_ratio = exp_ratio self.data_format = data_format self.hidden_dim = self.exp_ratio * self.in_channels self.identity = False if (self.strides == (1, 1) and self.in_channels == self.n_filter): self.identity = True if (self.exp_ratio == 1): self.main_block=LayerList([ DepthwiseConv2d(in_channels=self.hidden_dim,filter_size=(3,3),strides=self.strides,\ b_init=None,data_format=self.data_format), BatchNorm2d(num_features=self.hidden_dim,is_train=True,act=tf.nn.relu6,data_format=self.data_format), Conv2d(n_filter=self.n_filter,in_channels=self.hidden_dim,filter_size=(1,1),strides=(1,1),b_init=None,data_format=self.data_format), BatchNorm2d(num_features=self.n_filter,is_train=True,act=None,data_format=self.data_format) ]) else: self.main_block=LayerList([ Conv2d(n_filter=self.hidden_dim,in_channels=self.in_channels,filter_size=(1,1),strides=(1,1),b_init=None,data_format=self.data_format), BatchNorm2d(num_features=self.hidden_dim,is_train=True,act=tf.nn.relu6,data_format=self.data_format), DepthwiseConv2d(in_channels=self.hidden_dim,filter_size=(3,3),strides=self.strides,\ b_init=None,data_format=self.data_format), BatchNorm2d(num_features=self.hidden_dim,is_train=True,act=tf.nn.relu6,data_format=self.data_format), Conv2d(n_filter=self.n_filter,in_channels=self.hidden_dim,filter_size=(1,1),strides=(1,1),b_init=None,data_format=self.data_format) ]) def forward(self, x): if (self.identity): return x + self.main_block.forward(x) else: return self.main_block.forward(x)
class Discriminator(Model): def __init__(self, feature_hin, feature_win, in_channels, data_format="channels_first"): super().__init__() self.data_format = data_format self.feature_hin = feature_hin self.feature_win = feature_win self.in_channels = in_channels self.layer_num = 5 self.n_filter = 256 # construct Model layer_list = [] last_channels = self.in_channels dis_hin, dis_win = self.feature_hin, self.feature_win for layer_idx in range(0, self.layer_num): strides = (1, 1) if (dis_hin >= 4 or dis_win >= 4): strides = (2, 2) dis_hin, dis_win = (dis_hin + 1) // 2, (dis_win + 1) // 2 layer_list+=[ Conv2d(n_filter=self.n_filter,in_channels=last_channels,strides=strides,act=tf.nn.relu,\ data_format=data_format,name=f"dis_conv_{layer_idx}") ] last_channels = self.n_filter layer_list.append(Flatten(name="Flatten")) layer_list.append( Dense(n_units=4096, in_channels=dis_hin * dis_win * self.n_filter, act=tf.nn.relu, name="fc1")) layer_list.append( Dense(n_units=1000, in_channels=4096, act=tf.nn.relu, name="fc2")) layer_list.append( Dense(n_units=1, in_channels=1000, act=None, name="fc3")) self.main_block = LayerList(layer_list) def forward(self, x): return self.main_block.forward(x) def cal_loss(self, x, label): label_shape = [x.shape[0], 1] if (label == True): gt_label = tf.ones(shape=label_shape) elif (label == False): gt_label = tf.zeros(shape=label_shape) loss = tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(labels=gt_label, logits=x)) return loss
class MobilenetV1_backbone(Model): def __init__(self, scale_size=8, data_format="channel_last"): super().__init__() self.data_format = data_format self.scale_size = scale_size if (self.scale_size == 8): strides = (1, 1) else: strides = (2, 2) self.out_channels = 1024 self.layer_list = [] self.layer_list += self.conv_block(n_filter=32, in_channels=3, filter_size=(3, 3), strides=(2, 2)) self.layer_list += self.separable_conv_block(n_filter=64, in_channels=32, filter_size=(3, 3), strides=(1, 1)) self.layer_list += self.separable_conv_block(n_filter=128, in_channels=64, filter_size=(3, 3), strides=(2, 2)) self.layer_list += self.separable_conv_block(n_filter=128, in_channels=128, filter_size=(3, 3), strides=(1, 1)) self.layer_list += self.separable_conv_block(n_filter=256, in_channels=128, filter_size=(3, 3), strides=(2, 2)) self.layer_list += self.separable_conv_block(n_filter=256, in_channels=256, filter_size=(3, 3), strides=(1, 1)) self.layer_list += self.separable_conv_block(n_filter=512, in_channels=256, filter_size=(3, 3), strides=strides) self.layer_list += self.separable_conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1)) self.layer_list += self.separable_conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1)) self.layer_list += self.separable_conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1)) self.layer_list += self.separable_conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1)) self.layer_list += self.separable_conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1)) self.layer_list += self.separable_conv_block(n_filter=1024, in_channels=512, filter_size=(3, 3), strides=strides) self.layer_list += self.separable_conv_block(n_filter=1024, in_channels=1024, filter_size=(3, 3), strides=(1, 1)) self.main_block = LayerList(self.layer_list) def forward(self, x): return self.main_block.forward(x) def conv_block(self, n_filter=32, in_channels=3, filter_size=(3, 3), strides=(1, 1), padding="SAME"): layer_list = [] layer_list.append(Conv2d(n_filter=n_filter,in_channels=in_channels,filter_size=filter_size,strides=strides,\ data_format=self.data_format,padding=padding)) layer_list.append( BatchNorm2d(num_features=n_filter, is_train=True, act=tf.nn.relu, data_format=self.data_format)) return LayerList(layer_list) def separable_conv_block(self, n_filter=32, in_channels=3, filter_size=(3, 3), strides=(1, 1)): layer_list = [] layer_list.append( DepthwiseConv2d(in_channels=in_channels, filter_size=filter_size, strides=strides, data_format=self.data_format)) layer_list.append( BatchNorm2d(num_features=in_channels, is_train=True, act=tf.nn.relu, data_format=self.data_format)) layer_list.append( Conv2d(n_filter=n_filter, in_channels=in_channels, filter_size=(1, 1), strides=(1, 1), data_format=self.data_format)) layer_list.append( BatchNorm2d(num_features=n_filter, is_train=True, act=tf.nn.relu, data_format=self.data_format)) return LayerList(layer_list)
class vgg19_backbone(Model): def __init__(self, in_channels=3, scale_size=8, data_format="channels_first"): super().__init__() self.in_channels = in_channels self.data_format = data_format self.scale_size = scale_size if (self.scale_size == 8): strides = (1, 1) else: strides = (2, 2) self.out_channels = 512 self.layer_list = [ self.conv_block(n_filter=64, in_channels=self.in_channels, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), self.conv_block(n_filter=64, in_channels=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), MaxPool2d(filter_size=(2, 2), strides=(2, 2), data_format=self.data_format), self.conv_block(n_filter=128, in_channels=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), self.conv_block(n_filter=128, in_channels=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), MaxPool2d(filter_size=(2, 2), strides=(2, 2), data_format=self.data_format), self.conv_block(n_filter=256, in_channels=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), self.conv_block(n_filter=256, in_channels=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), self.conv_block(n_filter=256, in_channels=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), self.conv_block(n_filter=256, in_channels=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), MaxPool2d(filter_size=(2, 2), strides=(2, 2), data_format=self.data_format), self.conv_block(n_filter=512, in_channels=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), self.conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), self.conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), self.conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu) ] if (self.scale_size == 32): self.layer_list += [ MaxPool2d(filter_size=(2, 2), strides=(2, 2), data_format=self.data_format) ] self.layer_list += [ self.conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), self.conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=strides, act=tf.nn.relu), self.conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu), self.conv_block(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu) ] self.main_block = LayerList(self.layer_list) def forward(self, x): return self.main_block.forward(x) def conv_block(self, n_filter=32, in_channels=3, filter_size=(3, 3), strides=(1, 1), act=None, padding="SAME"): return Conv2d(n_filter=n_filter,in_channels=in_channels,filter_size=filter_size,strides=strides,\ act=act,data_format=self.data_format,padding=padding)
class PoseProposal(Model): def __init__(self,K_size=18,L_size=17,win=384,hin=384,wout=12,hout=12,wnei=9,hnei=9\ ,lmd_rsp=0.25,lmd_iou=1,lmd_coor=5,lmd_size=5,lmd_limb=0.5,backbone=None,data_format="channels_first"): super().__init__() #construct params self.K = K_size self.L = L_size self.win = win self.hin = hin self.wout = wout self.hout = hout self.hnei = hnei self.wnei = wnei self.n_pos = K_size self.lmd_rsp = lmd_rsp self.lmd_iou = lmd_iou self.lmd_coor = lmd_coor self.lmd_size = lmd_size self.lmd_limb = lmd_limb self.data_format = data_format self.output_dim = 6 * self.K + self.hnei * self.wnei * self.L #construct networks if (backbone == None): self.backbone = self.Resnet_18(n_filter=512, in_channels=3, data_format=data_format) else: self.backbone = backbone(scale_size=32, data_format=self.data_format) self.add_layer_1 = LayerList([ Conv2d(n_filter=512, in_channels=self.backbone.out_channels, filter_size=(3, 3), strides=(1, 1), data_format=self.data_format), BatchNorm2d(decay=0.9, act=lambda x: tl.act.leaky_relu(x, alpha=0.1), is_train=True, num_features=512, data_format=self.data_format) ]) self.add_layer_2 = LayerList([ Conv2d(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1), data_format=self.data_format), BatchNorm2d(decay=0.9, act=lambda x: tl.act.leaky_relu(x, alpha=0.1), is_train=True, num_features=512, data_format=self.data_format) ]) self.add_layer_3 = Conv2d(n_filter=self.output_dim, in_channels=512, filter_size=(1, 1), strides=(1, 1), data_format=self.data_format) @tf.function def forward(self, x, is_train=False): x = self.backbone.forward(x) x = self.add_layer_1.forward(x) x = self.add_layer_2.forward(x) x = self.add_layer_3.forward(x) if (self.data_format == "channels_first"): pc = x[:, 0:self.K, :, :] pi = x[:, self.K:2 * self.K, :, :] px = x[:, 2 * self.K:3 * self.K, :, :] py = x[:, 3 * self.K:4 * self.K, :, :] pw = x[:, 4 * self.K:5 * self.K, :, :] ph = x[:, 5 * self.K:6 * self.K, :, :] pe = tf.reshape( x[:, 6 * self.K:, :, :], [-1, self.L, self.wnei, self.hnei, self.wout, self.hout]) else: pc = x[:, :, :, 0:self.K] pi = x[:, :, :, self.K:2 * self.K] px = x[:, :, :, 2 * self.K:3 * self.K] py = x[:, :, :, 3 * self.K:4 * self.K] pw = x[:, :, :, 4 * self.K:5 * self.K] ph = x[:, :, :, 5 * self.K:6 * self.K] pe = tf.reshape( x[:, :, :, 6 * self.K:], [-1, self.wnei, self.hnei, self.wout, self.hout, self.L]) if (is_train == False): px, py, pw, ph = self.restore_coor(px, py, pw, ph) return pc, pi, px, py, pw, ph, pe @tf.function def infer(self, x): pc, pi, px, py, pw, ph, pe = self.forward(x, is_train=False) return pc, pi, px, py, pw, ph, pe def restore_coor(self, x, y, w, h): grid_size_x = self.win / self.wout grid_size_y = self.hin / self.hout grid_x, grid_y = tf.meshgrid( np.arange(self.wout).astype(np.float32), np.arange(self.hout).astype(np.float32)) if (self.data_format == "channels_last"): grid_size_x = grid_size_x[:, :, np.newaxis] grid_size_y = grid_size_y[:, :, np.newaxis] rx = (x + grid_x) * grid_size_x ry = (y + grid_y) * grid_size_y rw = (w**2) * self.win rh = (h**2) * self.hin return rx, ry, rw, rh def cal_iou(self, bbx1, bbx2): #input x,y are the center of bbx x1, y1, w1, h1 = bbx1 x2, y2, w2, h2 = bbx2 area1 = w1 * h1 area2 = w2 * h2 inter_x = tf.nn.relu( tf.minimum(x1 + w1 / 2, x2 + w2 / 2) - tf.maximum(x1 - w1 / 2, x2 - w2 / 2)) inter_y = tf.nn.relu( tf.minimum(y1 + h1 / 2, y2 + h2 / 2) - tf.maximum(y1 - h1 / 2, y2 - h2 / 2)) inter_area = inter_x * inter_y union_area = area1 + area2 - inter_area + 1e-6 return inter_area / union_area def cal_loss(self, delta, tx, ty, tw, th, te, te_mask, pc, pi, px, py, pw, ph, pe): rtx, rty, rtw, rth = self.restore_coor(tx, ty, tw, th) rx, ry, rw, rh = self.restore_coor(px, py, pw, ph) ti = self.cal_iou((rtx, rty, rtw, rth), (rx, ry, rw, rh)) loss_rsp = self.lmd_rsp * tf.reduce_mean( tf.reduce_sum((delta - pc)**2, axis=[1, 2, 3])) loss_iou = self.lmd_iou * tf.reduce_mean( tf.reduce_sum(delta * ((ti - pi)**2), axis=[1, 2, 3])) loss_coor = self.lmd_coor * tf.reduce_mean( tf.reduce_sum(delta * ((tx - px)**2 + (ty - py)**2), axis=[1, 2, 3])) loss_size = self.lmd_size * tf.reduce_mean( tf.reduce_sum(delta * ((tw - pw)**2 + (th - ph)**2), axis=[1, 2, 3])) loss_limb = self.lmd_limb * tf.reduce_mean( tf.reduce_sum(te_mask * ((te - pe)**2), axis=[1, 2, 3, 4, 5])) return loss_rsp, loss_iou, loss_coor, loss_size, loss_limb class Resnet_18(Model): def __init__(self, n_filter=512, in_channels=3, data_format="channels_first"): super().__init__() self.data_format = data_format self.out_channels = n_filter self.conv1 = Conv2d(n_filter=64, in_channels=in_channels, filter_size=(7, 7), strides=(2, 2), b_init=None, data_format=self.data_format) self.bn1 = BatchNorm2d(decay=0.9, act=tf.nn.relu, is_train=True, num_features=64, data_format=self.data_format) self.maxpool = MaxPool2d(filter_size=(3, 3), strides=(2, 2), data_format=self.data_format) self.res_block_2_1 = self.Res_block(n_filter=64, in_channels=64, strides=(1, 1), is_down_sample=False, data_format=self.data_format) self.res_block_2_2 = self.Res_block(n_filter=64, in_channels=64, strides=(1, 1), is_down_sample=False, data_format=self.data_format) self.res_block_3_1 = self.Res_block(n_filter=128, in_channels=64, strides=(2, 2), is_down_sample=True, data_format=self.data_format) self.res_block_3_2 = self.Res_block(n_filter=128, in_channels=128, strides=(1, 1), is_down_sample=False, data_format=self.data_format) self.res_block_4_1 = self.Res_block(n_filter=256, in_channels=128, strides=(2, 2), is_down_sample=True, data_format=self.data_format) self.res_block_4_2 = self.Res_block(n_filter=256, in_channels=256, strides=(1, 1), is_down_sample=False, data_format=self.data_format) self.res_block_5_1 = self.Res_block(n_filter=n_filter, in_channels=256, strides=(2, 2), is_down_sample=True, data_format=self.data_format) def forward(self, x): x = self.conv1.forward(x) x = self.bn1.forward(x) x = self.maxpool.forward(x) x = self.res_block_2_1.forward(x) x = self.res_block_2_2.forward(x) x = self.res_block_3_1.forward(x) x = self.res_block_3_2.forward(x) x = self.res_block_4_1.forward(x) x = self.res_block_4_2.forward(x) x = self.res_block_5_1.forward(x) return x class Res_block(Model): def __init__(self, n_filter, in_channels, strides=(1, 1), is_down_sample=False, data_format="channels_first"): super().__init__() self.data_format = data_format self.is_down_sample = is_down_sample self.main_block = LayerList([ Conv2d(n_filter=n_filter, in_channels=in_channels, filter_size=(3, 3), strides=strides, b_init=None, data_format=self.data_format), BatchNorm2d(decay=0.9, act=tf.nn.relu, is_train=True, num_features=n_filter, data_format=self.data_format), Conv2d(n_filter=n_filter, in_channels=n_filter, filter_size=(3, 3), strides=(1, 1), b_init=None, data_format=self.data_format), BatchNorm2d(decay=0.9, is_train=True, num_features=n_filter, data_format=self.data_format), ]) if (self.is_down_sample): self.down_sample = LayerList([ Conv2d(n_filter=n_filter, in_channels=in_channels, filter_size=(3, 3), strides=strides, b_init=None, data_format=self.data_format), BatchNorm2d(decay=0.9, is_train=True, num_features=n_filter, data_format=self.data_format) ]) def forward(self, x): res = x x = self.main_block.forward(x) if (self.is_down_sample): res = self.down_sample.forward(res) return tf.nn.relu(x + res)
class OpenPose(Model): def __init__(self,n_pos=19,n_limbs=19,num_channels=128,hin=368,win=368,hout=46,wout=46,backbone=None,pretrained_backbone=True,data_format="channels_first"): super().__init__() self.num_channels=num_channels self.n_pos=n_pos self.n_limbs=n_limbs self.n_confmaps=n_pos self.n_pafmaps=2*n_limbs self.hin=hin self.win=win self.hout=hout self.wout=wout self.data_format=data_format self.pretrained_backbone=pretrained_backbone self.concat_dim=1 if self.data_format=="channels_first" else -1 #back bone configure if(backbone==None): self.backbone=self.vgg19(in_channels=3,pretrained=self.pretrained_backbone,data_format=self.data_format) else: self.backbone=backbone(scale_size=8,data_format=self.data_format) self.cpm_stage=LayerList([ Conv2d(n_filter=256,in_channels=self.backbone.out_channels,filter_size=(3,3),strides=(1,1),padding="SAME",act=tf.nn.relu,data_format=self.data_format), Conv2d(n_filter=128,in_channels=256,filter_size=(3,3),strides=(1,1),padding="SAME",act=tf.nn.relu,data_format=self.data_format) ]) #init stage self.init_stage=self.Init_stage(n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps,in_channels=128,data_format=self.data_format) #one refinemnet stage self.refinement_stage_1=self.Refinement_stage(n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps+self.n_pafmaps+128,data_format=self.data_format) self.refinement_stage_2=self.Refinement_stage(n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps+self.n_pafmaps+128,data_format=self.data_format) self.refinement_stage_3=self.Refinement_stage(n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps+self.n_pafmaps+128,data_format=self.data_format) self.refinement_stage_4=self.Refinement_stage(n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps+self.n_pafmaps+128,data_format=self.data_format) self.refinement_stage_5=self.Refinement_stage(n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps+self.n_pafmaps+128,data_format=self.data_format) @tf.function def forward(self,x,is_train=False): conf_list=[] paf_list=[] #backbone feature extract vgg_features=self.backbone.forward(x) vgg_features=self.cpm_stage.forward(vgg_features) #init stage init_conf,init_paf=self.init_stage.forward(vgg_features) conf_list.append(init_conf) paf_list.append(init_paf) #refinement stages for refine_stage_idx in range(1,6): ref_x=tf.concat([vgg_features,conf_list[-1],paf_list[-1]],self.concat_dim) ref_conf,ref_paf=eval(f"self.refinement_stage_{refine_stage_idx}.forward(ref_x)") conf_list.append(ref_conf) paf_list.append(ref_paf) if(is_train): return conf_list[-1],paf_list[-1],conf_list,paf_list else: return conf_list[-1],paf_list[-1] @tf.function def infer(self,x): conf_map,paf_map=self.forward(x,is_train=False) return conf_map,paf_map def cal_loss(self,gt_conf,gt_paf,mask,stage_confs,stage_pafs): stage_losses=[] batch_size=gt_conf.shape[0] if(self.concat_dim==1): mask_conf=tf_repeat(mask, [1,self.n_confmaps ,1,1]) mask_paf=tf_repeat(mask,[1,self.n_pafmaps ,1,1]) elif(self.concat_dim==-1): mask_conf=tf_repeat(mask, [1,1,1,self.n_confmaps]) mask_paf=tf_repeat(mask,[1,1,1,self.n_pafmaps]) loss_confs,loss_pafs=[],[] for stage_id,(stage_conf,stage_paf) in enumerate(zip(stage_confs,stage_pafs)): loss_conf=tf.nn.l2_loss((gt_conf-stage_conf)*mask_conf) loss_paf=tf.nn.l2_loss((gt_paf-stage_paf)*mask_paf) stage_losses.append(loss_conf) stage_losses.append(loss_paf) loss_confs.append(loss_conf) loss_pafs.append(loss_paf) pd_loss=tf.reduce_mean(stage_losses)/batch_size return pd_loss,loss_confs,loss_pafs class vgg19(Model): def __init__(self,in_channels=3,data_format="channels_first",pretrained=True): super().__init__() self.data_format=data_format self.pretrained=pretrained self.transpose=False self.out_channels=512 if(self.data_format=="channel_last"): self.main_block=tl.models.vgg19(pretrained=self.pretrained,end_with="conv4_2") else: if(self.pretrained): print("only channels_last pretrained vgg19 available, adding transpose") self.main_block=tl.models.vgg19(pretrained=self.pretrained,end_with="conv4_2") self.transpose=True else: self.main_block=layers.LayerList([ self.conv_block(n_filter=64,in_channels=3,filter_size=(3,3),strides=(1,1),act=tf.nn.relu), self.conv_block(n_filter=64,in_channels=64,filter_size=(3,3),strides=(1,1),act=tf.nn.relu), MaxPool2d(filter_size=(2,2),strides=(2,2),data_format=self.data_format), self.conv_block(n_filter=128,in_channels=64,filter_size=(3,3),strides=(1,1),act=tf.nn.relu), self.conv_block(n_filter=128,in_channels=128,filter_size=(3,3),strides=(1,1),act=tf.nn.relu), MaxPool2d(filter_size=(2,2),strides=(2,2),data_format=self.data_format), self.conv_block(n_filter=256,in_channels=128,filter_size=(3,3),strides=(1,1),act=tf.nn.relu), self.conv_block(n_filter=256,in_channels=256,filter_size=(3,3),strides=(1,1),act=tf.nn.relu), self.conv_block(n_filter=256,in_channels=256,filter_size=(3,3),strides=(1,1),act=tf.nn.relu), self.conv_block(n_filter=256,in_channels=256,filter_size=(3,3),strides=(1,1),act=tf.nn.relu), MaxPool2d(filter_size=(2,2),strides=(2,2),data_format=self.data_format), self.conv_block(n_filter=512,in_channels=256,filter_size=(3,3),strides=(1,1),act=tf.nn.relu), self.conv_block(n_filter=512,in_channels=512,filter_size=(3,3),strides=(1,1),act=tf.nn.relu) ]) def conv_block(self,n_filter=32,in_channels=3,filter_size=(3,3),strides=(1,1),act=None,padding="SAME"): return Conv2d(n_filter=n_filter,in_channels=in_channels,filter_size=filter_size,strides=strides,\ act=act,data_format=self.data_format,padding=padding) def forward(self,x): if(self.transpose): x=tf.transpose(x,[0,2,3,1]) x=self.main_block.forward(x) if(self.transpose): x=tf.transpose(x,[0,3,1,2]) return x class Init_stage(Model): def __init__(self,n_confmaps=19,n_pafmaps=38,in_channels=128,data_format="channels_first"): super().__init__() self.n_confmaps=n_confmaps self.n_pafmaps=n_pafmaps self.in_channels=in_channels self.data_format=data_format self.conf_block=layers.LayerList([ Conv2d(n_filter=128,in_channels=self.in_channels,filter_size=(3,3),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(3,3),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(3,3),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=512,in_channels=128,filter_size=(1,1),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=self.n_confmaps,in_channels=512,filter_size=(1,1),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format) ]) self.paf_block=layers.LayerList([ Conv2d(n_filter=128,in_channels=self.in_channels,filter_size=(3,3),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(3,3),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(3,3),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=512,in_channels=128,filter_size=(1,1),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=self.n_pafmaps,in_channels=512,filter_size=(1,1),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format) ]) def forward(self,x): conf_map=self.conf_block.forward(x) paf_map=self.paf_block.forward(x) return conf_map,paf_map class Refinement_stage(Model): def __init__(self,n_confmaps=19,n_pafmaps=38,in_channels=185,data_format="channels_first"): super().__init__() self.n_confmaps=n_confmaps self.n_pafmaps=n_pafmaps self.in_channels=in_channels self.data_format=data_format self.conf_block=layers.LayerList([ Conv2d(n_filter=128,in_channels=self.in_channels,filter_size=(7,7),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(7,7),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(7,7),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(7,7),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(7,7),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(1,1),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=self.n_confmaps,in_channels=128,filter_size=(1,1),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format) ]) self.paf_block=layers.LayerList([ Conv2d(n_filter=128,in_channels=self.in_channels,filter_size=(7,7),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(7,7),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(7,7),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(7,7),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(7,7),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=128,in_channels=128,filter_size=(1,1),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format), Conv2d(n_filter=self.n_pafmaps,in_channels=128,filter_size=(1,1),strides=(1,1),padding="SAME",act=tf.nn.relu,W_init=initial_w,b_init=initial_b,data_format=self.data_format) ]) def forward(self,x): conf_map=self.conf_block.forward(x) paf_map=self.paf_block.forward(x) return conf_map,paf_map
class OpenPose(Model): def __init__(self,parts=CocoPart,limbs=CocoLimb,colors=None,n_pos=19,n_limbs=19,num_channels=128,\ hin=368,win=368,hout=46,wout=46,backbone=None,pretraining=False,data_format="channels_first"): super().__init__() self.num_channels = num_channels self.parts = parts self.limbs = limbs self.n_pos = n_pos self.n_limbs = n_limbs self.colors = colors self.n_confmaps = n_pos self.n_pafmaps = 2 * n_limbs self.hin = hin self.win = win self.hout = hout self.wout = wout self.data_format = data_format self.concat_dim = 1 if self.data_format == "channels_first" else -1 #back bone configure if (backbone == None): self.backbone = vgg19_backbone(scale_size=8, pretraining=pretraining, data_format=self.data_format) else: self.backbone = backbone(scale_size=8, pretraining=pretraining, data_format=self.data_format) self.cpm_stage = LayerList([ Conv2d(n_filter=256, in_channels=self.backbone.out_channels, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=tf.nn.relu, data_format=self.data_format), Conv2d(n_filter=128, in_channels=256, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=tf.nn.relu, data_format=self.data_format) ]) #init stage self.init_stage = self.Init_stage(n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=128, data_format=self.data_format) #one refinemnet stage self.refinement_stage_1 = self.Refinement_stage( n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps + self.n_pafmaps + 128, data_format=self.data_format) self.refinement_stage_2 = self.Refinement_stage( n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps + self.n_pafmaps + 128, data_format=self.data_format) self.refinement_stage_3 = self.Refinement_stage( n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps + self.n_pafmaps + 128, data_format=self.data_format) self.refinement_stage_4 = self.Refinement_stage( n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps + self.n_pafmaps + 128, data_format=self.data_format) self.refinement_stage_5 = self.Refinement_stage( n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps + self.n_pafmaps + 128, data_format=self.data_format) @tf.function def forward(self, x, is_train=False, stage_num=5, domainadapt=False): conf_list = [] paf_list = [] #backbone feature extract backbone_features = self.backbone.forward(x) backbone_features = self.cpm_stage.forward(backbone_features) #init stage init_conf, init_paf = self.init_stage.forward(backbone_features) conf_list.append(init_conf) paf_list.append(init_paf) #refinement stages for refine_stage_idx in range(1, stage_num + 1): ref_x = tf.concat([backbone_features, conf_list[-1], paf_list[-1]], self.concat_dim) ref_conf, ref_paf = eval( f"self.refinement_stage_{refine_stage_idx}.forward(ref_x)") conf_list.append(ref_conf) paf_list.append(ref_paf) if (domainadapt): return conf_list[-1], paf_list[ -1], conf_list, paf_list, backbone_features if (is_train): return conf_list[-1], paf_list[-1], conf_list, paf_list else: return conf_list[-1], paf_list[-1] @tf.function(experimental_relax_shapes=True) def infer(self, x, stage_num=5): conf_map, paf_map = self.forward(x, is_train=False, stage_num=stage_num) return conf_map, paf_map def cal_loss(self, gt_conf, gt_paf, mask, stage_confs, stage_pafs): stage_losses = [] batch_size = gt_conf.shape[0] if (self.concat_dim == 1): mask_conf = tf_repeat(mask, [1, self.n_confmaps, 1, 1]) mask_paf = tf_repeat(mask, [1, self.n_pafmaps, 1, 1]) elif (self.concat_dim == -1): mask_conf = tf_repeat(mask, [1, 1, 1, self.n_confmaps]) mask_paf = tf_repeat(mask, [1, 1, 1, self.n_pafmaps]) loss_confs, loss_pafs = [], [] for stage_id, (stage_conf, stage_paf) in enumerate(zip(stage_confs, stage_pafs)): loss_conf = tf.nn.l2_loss((gt_conf - stage_conf) * mask_conf) loss_paf = tf.nn.l2_loss((gt_paf - stage_paf) * mask_paf) #print(f"test stage:{stage_id} conf_loss:{loss_conf} paf_loss:{loss_paf}") stage_losses.append(loss_conf) stage_losses.append(loss_paf) loss_confs.append(loss_conf) loss_pafs.append(loss_paf) pd_loss = tf.reduce_mean(stage_losses) / batch_size return pd_loss, loss_confs, loss_pafs class Init_stage(Model): def __init__(self, n_confmaps=19, n_pafmaps=38, in_channels=128, data_format="channels_first"): super().__init__() self.n_confmaps = n_confmaps self.n_pafmaps = n_pafmaps self.in_channels = in_channels self.data_format = data_format self.conf_block = layers.LayerList([ Conv2d(n_filter=128, in_channels=self.in_channels, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=512, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=512), Conv2d(n_filter=self.n_confmaps, in_channels=512, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=self.n_confmaps) ]) self.paf_block = layers.LayerList([ Conv2d(n_filter=128, in_channels=self.in_channels, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=512, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=512), Conv2d(n_filter=self.n_pafmaps, in_channels=512, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=self.n_pafmaps) ]) def forward(self, x): conf_map = self.conf_block.forward(x) paf_map = self.paf_block.forward(x) return conf_map, paf_map class Refinement_stage(Model): def __init__(self, n_confmaps=19, n_pafmaps=38, in_channels=185, data_format="channels_first"): super().__init__() self.n_confmaps = n_confmaps self.n_pafmaps = n_pafmaps self.in_channels = in_channels self.data_format = data_format self.conf_block = layers.LayerList([ Conv2d(n_filter=128, in_channels=self.in_channels, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=self.n_confmaps, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=self.n_confmaps) ]) self.paf_block = layers.LayerList([ Conv2d(n_filter=128, in_channels=self.in_channels, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=self.n_pafmaps, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=self.n_pafmaps) ]) def forward(self, x): conf_map = self.conf_block.forward(x) paf_map = self.paf_block.forward(x) return conf_map, paf_map
class PoseProposal(Model): def __init__(self,parts=CocoPart,limbs=CocoLimb,colors=None,K_size=18,L_size=17,win=384,hin=384,wout=12,hout=12,wnei=9,hnei=9\ ,lmd_rsp=0.25,lmd_iou=1,lmd_coor=5,lmd_size=5,lmd_limb=0.5,backbone=None,pretraining=False,data_format="channels_first"): super().__init__() #construct params self.parts = parts self.limbs = limbs self.colors = colors self.K = K_size self.L = L_size self.win = win self.hin = hin self.wout = wout self.hout = hout self.hnei = hnei self.wnei = wnei self.n_pos = K_size self.lmd_rsp = lmd_rsp self.lmd_iou = lmd_iou self.lmd_coor = lmd_coor self.lmd_size = lmd_size self.lmd_limb = lmd_limb self.data_format = data_format self.output_dim = 6 * self.K + self.hnei * self.wnei * self.L #construct networks if (backbone == None): self.backbone = Resnet18_backbone(scale_size=32, pretraining=pretraining, data_format=data_format) else: self.backbone = backbone(scale_size=32, pretraining=pretraining, data_format=self.data_format) self.add_layer_1 = LayerList([ Conv2d(n_filter=512, in_channels=self.backbone.out_channels, filter_size=(3, 3), strides=(1, 1), data_format=self.data_format, name="add_block_1_conv_1"), BatchNorm2d(decay=0.9, act=lambda x: tl.act.leaky_relu(x, alpha=0.1), is_train=True, num_features=512, data_format=self.data_format, name="add_block_1_bn_1"), ], name="add_block_1") self.add_layer_2 = LayerList([ Conv2d(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1), data_format=self.data_format, name="add_block_2_conv_1"), BatchNorm2d(decay=0.9, act=lambda x: tl.act.leaky_relu(x, alpha=0.1), is_train=True, num_features=512, data_format=self.data_format, name="add_block_2_bn_1") ], name="add_block_2") self.add_layer_3 = Conv2d(n_filter=self.output_dim, in_channels=512, filter_size=(1, 1), strides=(1, 1), data_format=self.data_format, name="add_block_3_conv_1") @tf.function def forward(self, x, is_train=False, ret_backbone=False): backbone_features = self.backbone.forward(x) x = self.add_layer_1.forward(backbone_features) x = self.add_layer_2.forward(x) x = self.add_layer_3.forward(x) x = tf.nn.sigmoid(x) pc = x[:, 0:self.K, :, :] pi = x[:, self.K:2 * self.K, :, :] px = x[:, 2 * self.K:3 * self.K, :, :] py = x[:, 3 * self.K:4 * self.K, :, :] pw = x[:, 4 * self.K:5 * self.K, :, :] ph = x[:, 5 * self.K:6 * self.K, :, :] pe = tf.reshape(x[:, 6 * self.K:, :, :], [-1, self.L, self.wnei, self.hnei, self.wout, self.hout]) if (is_train == False): px, py, pw, ph = self.restore_coor(px, py, pw, ph) # construct predict_x predict_x = {"c": pc, "x": px, "y": py, "w": pw, "h": ph, "i": pi, "e": pe} if (ret_backbone): predict_x["backbone_features"] = backbone_features return predict_x @tf.function def infer(self, x): predict_x = self.forward(x, is_train=False) pc, px, py, pw, ph, pi, pe = predict_x["c"], predict_x["x"], predict_x["y"], predict_x["w"], predict_x["h"],\ predict_x["i"], predict_x["e"] return pc, pi, px, py, pw, ph, pe def restore_coor(self, x, y, w, h): grid_size_x = self.win / self.wout grid_size_y = self.hin / self.hout grid_x, grid_y = tf.meshgrid(np.arange(self.wout).astype(np.float32), np.arange(self.hout).astype(np.float32)) rx = (x + grid_x) * grid_size_x ry = (y + grid_y) * grid_size_y rw = w * self.win rh = h * self.hin return rx, ry, rw, rh def cal_iou(self, bbx1, bbx2): #input x,y are the center of bbx x1, y1, w1, h1 = bbx1 x2, y2, w2, h2 = bbx2 area1 = w1 * h1 area2 = w2 * h2 inter_x = tf.nn.relu(tf.minimum(x1 + w1 / 2, x2 + w2 / 2) - tf.maximum(x1 - w1 / 2, x2 - w2 / 2)) inter_y = tf.nn.relu(tf.minimum(y1 + h1 / 2, y2 + h2 / 2) - tf.maximum(y1 - h1 / 2, y2 - h2 / 2)) inter_area = inter_x * inter_y union_area = area1 + area2 - inter_area + 1e-6 return inter_area / union_area def cal_loss(self, predict_x, target_x, metric_manager: MetricManager, mask=None, eps=1e-6): # target_x pc, px, py, pw, ph, pi, pe = predict_x["c"], predict_x["x"], predict_x["y"], predict_x["w"], predict_x["h"],\ predict_x["i"], predict_x["e"] # predict_x gc, gx, gy, gw, gh, ge_mask, ge = target_x["c"], target_x["x"], target_x["y"], target_x["w"], target_x["h"],\ target_x["e_mask"], predict_x["e"] # restore coordinates rgx, rgy, rgw, rgh = self.restore_coor(gx, gy, gw, gh) rpx, rpy, rpw, rph = self.restore_coor(px, py, pw, ph) ti = self.cal_iou((rgx, rgy, rgw, rgh), (rpx, rpy, rpw, rph)) mask_point = tf.minimum(gc + tf.where(gc < 0.5, 0.00001, 0), 1) mask_edge = tf.minimum(ge_mask + tf.where(ge_mask < 0.5, 0.00001, 0), 1) half = tf.where(gc < 0.5, 0.5, 0) loss_rsp = self.lmd_rsp * tf.reduce_mean(tf.reduce_sum((gc - pc)**2, axis=[1, 2, 3])) loss_iou = self.lmd_iou * tf.reduce_mean(tf.reduce_sum(gc * ((ti - pi)**2), axis=[1, 2, 3])) loss_coor = self.lmd_coor * tf.reduce_mean( tf.reduce_sum(mask_point * ((gx - px - half)**2 + (gy - py - half)**2), axis=[1, 2, 3])) loss_size = self.lmd_size * tf.reduce_mean( tf.reduce_sum(mask_point * ((tf.sqrt(gw + eps) - tf.sqrt(pw + eps))**2 + (tf.sqrt(gh + eps) - tf.sqrt(ph + eps))**2), axis=[1, 2, 3])) loss_limb = self.lmd_limb * tf.reduce_mean(tf.reduce_sum(mask_edge * ((ge - pe)**2), axis=[1, 2, 3, 4, 5])) # regularize loss regularize_loss = regulize_loss(self, weight_decay_factor=2e-4) total_loss = loss_rsp + loss_iou + loss_coor + loss_size + loss_limb + regularize_loss metric_manager.update("model/loss_rsp", loss_rsp) metric_manager.update("model/loss_iou", loss_iou) metric_manager.update("model/loss_coor", loss_coor) metric_manager.update("model/loss_size", loss_size) metric_manager.update("model/loss_limb", loss_limb) metric_manager.update("model/loss_re", regularize_loss) metric_manager.update("model/total_loss", total_loss) return total_loss
class OpenPose(Model): def __init__(self,parts=CocoPart,limbs=CocoLimb,colors=None,n_pos=19,n_limbs=19,num_channels=128,\ hin=368,win=368,hout=46,wout=46,backbone=None,pretraining=False,data_format="channels_first"): super().__init__() self.num_channels = num_channels self.parts = parts self.limbs = limbs self.n_pos = n_pos self.n_limbs = n_limbs self.colors = colors self.n_confmaps = n_pos self.n_pafmaps = 2 * n_limbs self.hin = hin self.win = win self.hout = hout self.wout = wout self.data_format = data_format self.concat_dim = 1 if self.data_format == "channels_first" else -1 #back bone configure if (backbone == None): self.backbone = vgg19_backbone(scale_size=8, pretraining=pretraining, data_format=self.data_format) else: self.backbone = backbone(scale_size=8, pretraining=pretraining, data_format=self.data_format) self.cpm_stage = LayerList([ Conv2d(n_filter=256, in_channels=self.backbone.out_channels, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=tf.nn.relu, data_format=self.data_format), Conv2d(n_filter=128, in_channels=256, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=tf.nn.relu, data_format=self.data_format) ]) #init stage self.init_stage = self.Init_stage(n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=128, data_format=self.data_format) #one refinemnet stage self.refinement_stage_1 = self.Refinement_stage( n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps + self.n_pafmaps + 128, data_format=self.data_format) self.refinement_stage_2 = self.Refinement_stage( n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps + self.n_pafmaps + 128, data_format=self.data_format) self.refinement_stage_3 = self.Refinement_stage( n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps + self.n_pafmaps + 128, data_format=self.data_format) self.refinement_stage_4 = self.Refinement_stage( n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps + self.n_pafmaps + 128, data_format=self.data_format) self.refinement_stage_5 = self.Refinement_stage( n_confmaps=self.n_confmaps, n_pafmaps=self.n_pafmaps, in_channels=self.n_confmaps + self.n_pafmaps + 128, data_format=self.data_format) @tf.function def forward(self, x, is_train=False, ret_backbone=False): if (self.data_format == "channels_last"): x = NCHW_to_NHWC(x) stage_num = 5 conf_list = [] paf_list = [] # backbone feature extract backbone_features = self.backbone.forward(x) backbone_features = self.cpm_stage.forward(backbone_features) # init stage init_conf, init_paf = self.init_stage.forward(backbone_features) conf_list.append(init_conf) paf_list.append(init_paf) # refinement stages for refine_stage_idx in range(1, stage_num + 1): ref_x = tf.concat([backbone_features, conf_list[-1], paf_list[-1]], self.concat_dim) ref_conf, ref_paf = eval( f"self.refinement_stage_{refine_stage_idx}.forward(ref_x)") conf_list.append(ref_conf) paf_list.append(ref_paf) if (self.data_format == "channels_last"): backbone_features = NHWC_to_NCHW(backbone_features) conf_list = [NHWC_to_NCHW(conf) for conf in conf_list] paf_list = [NHWC_to_NCHW(paf) for paf in paf_list] # construct predict_x predict_x = { "conf_map": conf_list[-1], "paf_map": paf_list[-1], "stage_confs": conf_list, "stage_pafs": paf_list } if (ret_backbone): predict_x["backbone_features"] = backbone_features return predict_x @tf.function(experimental_relax_shapes=True) def infer(self, x): predict_x = self.forward(x, is_train=False) conf_map, paf_map = predict_x["conf_map"], predict_x["paf_map"] return conf_map, paf_map def cal_loss(self, predict_x, target_x, metric_manager, mask=None): # TODO: exclude the loss calculate from mask # predict maps stage_confs = predict_x["stage_confs"] stage_pafs = predict_x["stage_pafs"] # target maps gt_conf = target_x["conf_map"] gt_paf = target_x["paf_map"] stage_losses = [] batch_size = gt_conf.shape[0] loss_confs, loss_pafs = [], [] for stage_id, (stage_conf, stage_paf) in enumerate(zip(stage_confs, stage_pafs)): loss_conf = tf.nn.l2_loss(gt_conf - stage_conf) loss_paf = tf.nn.l2_loss(gt_paf - stage_paf) stage_losses.append(loss_conf) stage_losses.append(loss_paf) loss_confs.append(loss_conf) loss_pafs.append(loss_paf) pd_loss = tf.reduce_mean(stage_losses) / batch_size total_loss = pd_loss metric_manager.update("model/conf_loss", loss_confs[-1]) metric_manager.update("model/paf_loss", loss_pafs[-1]) # regularize loss regularize_loss = regulize_loss(self, weight_decay_factor=2e-4) total_loss += regularize_loss metric_manager.update("model/loss_re", regularize_loss) return total_loss class Init_stage(Model): def __init__(self, n_confmaps=19, n_pafmaps=38, in_channels=128, data_format="channels_first"): super().__init__() self.n_confmaps = n_confmaps self.n_pafmaps = n_pafmaps self.in_channels = in_channels self.data_format = data_format self.conf_block = layers.LayerList([ Conv2d(n_filter=128, in_channels=self.in_channels, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=512, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=512), Conv2d(n_filter=self.n_confmaps, in_channels=512, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=self.n_confmaps) ]) self.paf_block = layers.LayerList([ Conv2d(n_filter=128, in_channels=self.in_channels, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(3, 3), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=512, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=512), Conv2d(n_filter=self.n_pafmaps, in_channels=512, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=self.n_pafmaps) ]) def forward(self, x): conf_map = self.conf_block.forward(x) paf_map = self.paf_block.forward(x) return conf_map, paf_map class Refinement_stage(Model): def __init__(self, n_confmaps=19, n_pafmaps=38, in_channels=185, data_format="channels_first"): super().__init__() self.n_confmaps = n_confmaps self.n_pafmaps = n_pafmaps self.in_channels = in_channels self.data_format = data_format self.conf_block = layers.LayerList([ Conv2d(n_filter=128, in_channels=self.in_channels, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=self.n_confmaps, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=self.n_confmaps) ]) self.paf_block = layers.LayerList([ Conv2d(n_filter=128, in_channels=self.in_channels, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(7, 7), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=128, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=128), Conv2d(n_filter=self.n_pafmaps, in_channels=128, filter_size=(1, 1), strides=(1, 1), padding="SAME", act=None, W_init=initial_w, b_init=initial_b, data_format=self.data_format), tl.layers.PRelu(in_channels=self.n_pafmaps) ]) def forward(self, x): conf_map = self.conf_block.forward(x) paf_map = self.paf_block.forward(x) return conf_map, paf_map
class PoseProposal(Model): def __init__(self,parts=CocoPart,limbs=CocoLimb,colors=None,K_size=18,L_size=17,win=384,hin=384,wout=12,hout=12,wnei=9,hnei=9\ ,lmd_rsp=0.25,lmd_iou=1,lmd_coor=5,lmd_size=5,lmd_limb=0.5,backbone=None,pretraining=False,data_format="channels_first"): super().__init__() #construct params self.parts = parts self.limbs = limbs self.colors = colors self.K = K_size self.L = L_size self.win = win self.hin = hin self.wout = wout self.hout = hout self.hnei = hnei self.wnei = wnei self.n_pos = K_size self.lmd_rsp = lmd_rsp self.lmd_iou = lmd_iou self.lmd_coor = lmd_coor self.lmd_size = lmd_size self.lmd_limb = lmd_limb self.data_format = data_format self.output_dim = 6 * self.K + self.hnei * self.wnei * self.L #construct networks if (backbone == None): self.backbone = Resnet18_backbone(scale_size=32, pretraining=pretraining, data_format=data_format) else: self.backbone = backbone(scale_size=32, pretraining=pretraining, data_format=self.data_format) self.add_layer_1 = LayerList([ Conv2d(n_filter=512, in_channels=self.backbone.out_channels, filter_size=(3, 3), strides=(1, 1), data_format=self.data_format, name="add_block_1_conv_1"), BatchNorm2d(decay=0.9, act=lambda x: tl.act.leaky_relu(x, alpha=0.1), is_train=True, num_features=512, data_format=self.data_format, name="add_block_1_bn_1"), ], name="add_block_1") self.add_layer_2 = LayerList([ Conv2d(n_filter=512, in_channels=512, filter_size=(3, 3), strides=(1, 1), data_format=self.data_format, name="add_block_2_conv_1"), BatchNorm2d(decay=0.9, act=lambda x: tl.act.leaky_relu(x, alpha=0.1), is_train=True, num_features=512, data_format=self.data_format, name="add_block_2_bn_1") ], name="add_block_2") self.add_layer_3 = Conv2d(n_filter=self.output_dim, in_channels=512, filter_size=(1, 1), strides=(1, 1), data_format=self.data_format, name="add_block_3_conv_1") @tf.function def forward(self, x, is_train=False, domainadapt=False): backbone_features = self.backbone.forward(x) x = self.add_layer_1.forward(backbone_features) x = self.add_layer_2.forward(x) x = self.add_layer_3.forward(x) x = tf.nn.sigmoid(x) if (self.data_format == "channels_first"): pc = x[:, 0:self.K, :, :] pi = x[:, self.K:2 * self.K, :, :] px = x[:, 2 * self.K:3 * self.K, :, :] py = x[:, 3 * self.K:4 * self.K, :, :] pw = x[:, 4 * self.K:5 * self.K, :, :] ph = x[:, 5 * self.K:6 * self.K, :, :] pe = tf.reshape( x[:, 6 * self.K:, :, :], [-1, self.L, self.wnei, self.hnei, self.wout, self.hout]) else: pc = x[:, :, :, 0:self.K] pi = x[:, :, :, self.K:2 * self.K] px = x[:, :, :, 2 * self.K:3 * self.K] py = x[:, :, :, 3 * self.K:4 * self.K] pw = x[:, :, :, 4 * self.K:5 * self.K] ph = x[:, :, :, 5 * self.K:6 * self.K] pe = tf.reshape( x[:, :, :, 6 * self.K:], [-1, self.wnei, self.hnei, self.wout, self.hout, self.L]) if (is_train == False): px, py, pw, ph = self.restore_coor(px, py, pw, ph) if (domainadapt): return pc, pi, px, py, pw, ph, pe, backbone_features else: return pc, pi, px, py, pw, ph, pe @tf.function def infer(self, x): pc, pi, px, py, pw, ph, pe = self.forward(x, is_train=False) return pc, pi, px, py, pw, ph, pe def restore_coor(self, x, y, w, h): grid_size_x = self.win / self.wout grid_size_y = self.hin / self.hout grid_x, grid_y = tf.meshgrid( np.arange(self.wout).astype(np.float32), np.arange(self.hout).astype(np.float32)) if (self.data_format == "channels_last"): grid_x = grid_x[:, :, np.newaxis] grid_y = grid_y[:, :, np.newaxis] rx = (x + grid_x) * grid_size_x ry = (y + grid_y) * grid_size_y rw = w * self.win rh = h * self.hin return rx, ry, rw, rh def cal_iou(self, bbx1, bbx2): #input x,y are the center of bbx x1, y1, w1, h1 = bbx1 x2, y2, w2, h2 = bbx2 area1 = w1 * h1 area2 = w2 * h2 inter_x = tf.nn.relu( tf.minimum(x1 + w1 / 2, x2 + w2 / 2) - tf.maximum(x1 - w1 / 2, x2 - w2 / 2)) inter_y = tf.nn.relu( tf.minimum(y1 + h1 / 2, y2 + h2 / 2) - tf.maximum(y1 - h1 / 2, y2 - h2 / 2)) inter_area = inter_x * inter_y union_area = area1 + area2 - inter_area + 1e-6 return inter_area / union_area def cal_loss(self, delta, tx, ty, tw, th, te, te_mask, pc, pi, px, py, pw, ph, pe, eps=1e-6): rtx, rty, rtw, rth = self.restore_coor(tx, ty, tw, th) rx, ry, rw, rh = self.restore_coor(px, py, pw, ph) ti = self.cal_iou((rtx, rty, rtw, rth), (rx, ry, rw, rh)) mask_point = tf.minimum(delta + tf.where(delta < 0.5, 0.00001, 0), 1) mask_edge = tf.minimum(te_mask + tf.where(te_mask < 0.5, 0.00001, 0), 1) half = tf.where(delta < 0.5, 0.5, 0) loss_rsp = self.lmd_rsp * tf.reduce_mean( tf.reduce_sum((delta - pc)**2, axis=[1, 2, 3])) loss_iou = self.lmd_iou * tf.reduce_mean( tf.reduce_sum(delta * ((ti - pi)**2), axis=[1, 2, 3])) loss_coor = self.lmd_coor * tf.reduce_mean( tf.reduce_sum(mask_point * ((tx - px - half)**2 + (ty - py - half)**2), axis=[1, 2, 3])) loss_size = self.lmd_size * tf.reduce_mean( tf.reduce_sum(mask_point * ((tf.sqrt(tw + eps) - tf.sqrt(pw + eps))**2 + (tf.sqrt(th + eps) - tf.sqrt(ph + eps))**2), axis=[1, 2, 3])) loss_limb = self.lmd_limb * tf.reduce_mean( tf.reduce_sum(mask_edge * ((te - pe)**2), axis=[1, 2, 3, 4, 5])) return loss_rsp, loss_iou, loss_coor, loss_size, loss_limb