def forward(self, input): # names = list(input.keys()) input = list(input.values()) output1 = self.output1(input[0]) output2 = self.output2(input[1]) output3 = self.output3(input[2]) # up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest") # upsample by 2 up3 = self.upsample(output3) if up3.value[2] != output2.value[2] or up3.value[3] != output2.value[3]: pad = (0, output2.value[3] - up3.value[3], 0, output2.value[2] - up3.value[2]) up3 = F.pad(up3, pad) output2 = self.eltadd(output2, up3) output2 = self.merge2(output2) # up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest") # upsample by 2 up2 = self.upsample(output3) if up2.value[2] != output1.value[2] or up2.value[3] != output1.value[3]: pad = (0, output1.value[3] - up2.value[3], 0, output1.value[2] - up2.value[2]) up2 = F.pad(up2, pad) output1 = self.eltadd(output1, up2) output1 = self.merge1(output1) out = [output1, output2, output3] return out
def forward(self, x): loc = list() conf = list() conv1_x = self.inception1(self.conv1(x)) conv2_x = self.inception2(self.maxpool1(conv1_x)) conv3_x = self.inception3(self.conv3(conv2_x)) conv4_x = self.inception4(self.maxpool2(conv3_x)) conv5_x = self.conv5_2(self.conv5_1(conv4_x)) conv6_x = self.conv6_2(self.conv6_1(conv5_x)) # fpn conv6_x = self.latlayer6(conv6_x) conv5_x = self.latlayer5(conv5_x) conv4_x = self.latlayer4(conv4_x) conv3_x = self.latlayer3(conv3_x) conv2_x = self.latlayer2(conv2_x) conv1_x = self.latlayer1(conv1_x) conv4_x_up = self.upsample(conv4_x) if conv4_x_up.value[2] != conv3_x.value[2] or conv4_x_up.value[3] != conv3_x.value[3]: pad = (0, conv3_x.value[3] - conv4_x_up.value[3], 0, conv3_x.value[2] - conv4_x_up.value[2]) conv4_x_up = F.pad(conv4_x_up, pad) conv3_x = self.smooth3(self.eltmul(conv4_x_up , conv3_x)) conv3_x_up = self.upsample(conv2_x) if conv3_x_up.value[2] != conv2_x.value[2] or conv3_x_up.value[3] != conv2_x.value[3]: pad = (0, conv2_x.value[3] - conv3_x_up.value[3], 0, conv2_x.value[2] - conv3_x_up.value[2]) conv3_x_up = F.pad(conv3_x_up, pad) conv2_x = self.smooth2(self.eltmul(conv3_x_up , conv2_x)) conv2_x_up = self.upsample(conv2_x) if conv2_x_up.value[2] != conv1_x.value[2] or conv2_x_up.value[3] != conv1_x.value[3]: pad = (0, conv1_x.value[3] - conv2_x_up.value[3], 0, conv1_x.value[2] - conv2_x_up.value[2]) conv2_x_up = F.pad(conv2_x_up, pad) conv1_x = self.smooth1(self.eltmul(conv2_x_up , conv1_x)) sources = [conv1_x, conv2_x, conv3_x, conv4_x, conv5_x, conv6_x] # cpm sources[0] = self.cpm1(sources[0]) sources[1] = self.cpm2(sources[1]) sources[2] = self.cpm3(sources[2]) sources[3] = self.cpm4(sources[3]) sources[4] = self.cpm5(sources[4]) sources[5] = self.cpm6(sources[5]) # head featuremap_size = [] for (x, l, c) in zip(sources, self.loc, self.conf): featuremap_size.append([x.size(2), x.size(3)]) loc.append(l(x).permute(0, 2, 3, 1)) conf.append(c(x).permute(0, 2, 3, 1)) face_loc = flops_counter.cat([o.view(o.size(0), -1) for o in loc], 1) face_conf = flops_counter.cat([o.view(o.size(0), -1) for o in conf], 1) return face_loc.view(face_loc.size(0), -1, 4), self.softmax(face_conf.view(face_conf.size(0), -1, 2))
def feature_extractor(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) c2 = self.layer1(x) c3 = self.layer2(c2) c4 = self.layer3(c3) c5 = self.layer4(c4) c6 = self.layer5(c5) c7 = self.layer6(c6) c2_r = self.c2_conv(c2) c3_r = self.c3_conv(c3) c4_r = self.c4_conv(c4) c5_r = self.c5_conv(c5) c6_r = self.c6_conv(c6) c7_r = self.c7_conv(c7) c5_lateral = self.c5_lateral(c5) p5 = self.p5_conv(c5_lateral) p6 = self.p6_conv(c5_lateral) p7 = self.p7_conv(p6) c4_lateral = self.c4_lateral(c4) c5_lateral_up = self.upsample(c5_lateral) if c5_lateral_up.value[2] != c4_lateral.value[ 2] or c5_lateral_up.value[3] != c4_lateral.value[3]: pad = (0, c4_lateral.value[3] - c5_lateral_up.value[3], 0, c4_lateral.value[2] - c5_lateral_up.value[2]) c5_lateral_up = F.pad(c5_lateral_up, pad) sum_4 = self.eltadd(c4_lateral, c5_lateral_up) p4 = self.p4_conv(sum_4) c3_lateral = self.c3_lateral(c3) sum_4_up = self.upsample(sum_4) if sum_4_up.value[2] != c3_lateral.value[2] or sum_4_up.value[ 3] != c3_lateral.value[3]: pad = (0, c3_lateral.value[3] - sum_4_up.value[3], 0, c3_lateral.value[2] - sum_4_up.value[2]) sum_4_up = F.pad(sum_4_up, pad) sum_3 = self.eltadd(sum_4_up, c3_lateral) p3 = self.p3_conv(sum_3) c2_lateral = self.c2_lateral(c2) sum_3_up = self.upsample(sum_3) if sum_3_up.value[2] != c2_lateral.value[2] or sum_3_up.value[ 3] != c2_lateral.value[3]: pad = (0, c2_lateral.value[3] - sum_3_up.value[3], 0, c2_lateral.value[2] - sum_3_up.value[2]) sum_3_up = F.pad(sum_3_up, pad) sum_2 = self.eltadd(sum_3_up, c2_lateral) p2 = self.p2_conv(sum_2) return (c2_r, c3_r, c4_r, c5_r, c6_r, c7_r), (p2, p3, p4, p5, p6, p7)
def forward(self, x): outs = [] conv3_3 = self.conv3(self.conv2(self.conv1(x))) conv4_3 = self.conv4(conv3_3) conv5_3 = self.conv5(conv4_3) # M3 pool6 = self.pool6(conv5_3) m3_out = self.m3(pool6) outs.append(m3_out) # M2 m2_out = self.m2(conv5_3) outs.append(m2_out) # share by M1 and M0 conv4_128 = self.conv4_128(conv4_3) # M1 conv5_128 = self.conv5_128(conv5_3) conv5_128_up = self.conv5_128_up(conv5_128) # padding so that conv5_128_up can have the same size as conv4_128 if conv4_128.value[2] != conv5_128_up.value[2] or conv4_128.value[3] != conv5_128_up.value[3]: pad = (0, conv4_128.value[3] - conv5_128_up.value[3], 0, conv4_128.value[2] - conv5_128_up.value[2]) conv5_128_up = F.pad(conv5_128_up, pad) conv4_fuse = self.conv4_fuse(conv4_128, conv5_128_up) conv4_fuse_final = self.conv4_fuse_final(conv4_fuse) m1_out = self.m1(conv4_fuse_final) outs.append(m1_out) # M0 conv3_128 = self.conv3_128(conv3_3) conv4_128_up = self.conv4_128_up(conv4_128) # padding so that conv4_128_up can have the same size as conv3_128 if conv3_128.value[2] != conv4_128_up.value[2] or conv3_128.value[3] != conv4_128_up.value[3]: pad = (0, conv3_128.value[3] - conv4_128_up.value[3], 0, conv3_128.value[2] - conv4_128_up.value[2]) conv4_128_up = F.pad(conv4_128_up, pad) conv3_fuse = self.conv3_fuse(conv3_128, conv4_128_up) conv3_fuse_final = self.conv3_fuse_final(conv3_fuse) m0_out = self.m0(conv3_fuse_final) outs.append(m0_out) # detection head loc = [] conf = [] for i, o in enumerate(outs[::-1]): # reverse outs so that the order of feature maps match with the order of heads loc.append(self.bbox_head[i](o)) cls_score = self.cls_head[i](o) cls_score = cls_score.view(cls_score.size(0), -1, 4) cls_score = self.softmax(cls_score) conf.append(cls_score) return loc, conf
def forward(self, x): conv = self.conv(x) pool = self.pool(conv) res2a = self.res2a(pool) res2b = self.res2b(res2a) res2c = self.res2c(res2b) res3a = self.res3a(res2c) res3b1 = self.res3b1(res3a) res3b2 = self.res3b2(res3b1) res3b3 = self.res3b3(res3b2) res4a = self.res4a(res3b3) res4bX = res4a for i in range(0, 22): res4bX = self.res4bX[i](res4bX) # Detection Head score_res4 = self.score_res4(res4bX) score4 = self.score4(score_res4) score_res3 = self.score_res3(res3b3) if score4.value[2] != score_res3.value[2] or score4.value[3] != score_res3.value[3]: pads = (0, score_res3.value[3] - score4.value[3], 0, score_res3.value[2] - score4.value[2]) score4 = F.pad(score4, pads) score_final = self.eltadd(score4, score_res3) return score_final
def upsample_add(seq, source, target, up_handle, add_handle): up = up_handle(source) if up.value[2] != target.value[2] or up.value[3] != target.value[3]: pad = (0, target.value[3] - up.value[3], 0, target.value[2] - up.value[2]) up = F.pad(up, pad) dst = add_handle(seq(up), target) return dst
def forward(self, x): out = x # get conv4_3 for k in range(23): out = self.vgg16[k](out) conv4_3 = out # get conv5_3 for k in range(23, 30): out = self.vgg16[k](out) conv5_3 = out # get pool6 (it's actually pool5) pool6 = self.vgg16[30](out) # SSH - M3 M3_output = self.M3(pool6) # SSH - M3 - bbox_pred M3_bbox_pred = self.M3_bbox_pred(M3_output) # SSH - M3 - cls_score M3_cls_score = self.M3_cls_score(M3_output) M3_cls_score = M3_cls_score.view(M3_cls_score.value[0], 2, -1, M3_cls_score.value[-1]) M3_cls_prob = self.M3_cls_score_softmax(M3_cls_score) M3_cls_prob = M3_cls_prob.view(M3_cls_prob.value[0], 4, -1, M3_cls_prob.value[-1]) # SSH - M2 M2_output = self.M2(conv5_3) # SSH - M2 - bbox_pred M2_bbox_pred = self.M2_bbox_pred(M2_output) # SSH - M2 - cls_score M2_cls_score = self.M2_cls_score(M2_output) M2_cls_score = M2_cls_score.view(M2_cls_score.value[0], 2, -1, M2_cls_score.value[-1]) M2_cls_prob = self.M2_cls_score_softmax(M2_cls_score) M2_cls_prob = M2_cls_prob.view(M2_cls_prob.value[0], 4, -1, M2_cls_prob.value[-1]) # SSH - M1 conv4_128 = self.conv4_128(conv4_3) conv4_128 = self.conv4_128_relu(conv4_128) conv5_128 = self.conv5_128(conv5_3) conv5_128 = self.conv5_128_relu(conv5_128) conv5_128_up = self.conv5_128_up(conv5_128) if conv5_128_up.value[2] != conv4_128.value[2] or conv5_128_up.value[3] != conv4_128.value[3]: pads = (0, conv4_128.value[3] - conv5_128_up.value[3], 0, conv4_128.value[2] - conv5_128_up.value[2]) conv5_128_up = F.pad(conv5_128_up, pads) conv4_fuse = self.eltadd(conv4_128, conv5_128_up) conv4_fuse_final = self.conv4_fuse_final(conv4_fuse) conv4_fuse_final = self.conv4_fuse_final_relu(conv4_fuse_final) M1_output = self.M1(conv4_fuse_final) # SSH - M2 - bbox_pred M1_bbox_pred = self.M1_bbox_pred(M1_output) # SSH - M2 - cls_score M1_cls_score = self.M1_cls_score(M1_output) M1_cls_score = M1_cls_score.view(M1_cls_score.value[0], 2, -1, M1_cls_score.value[-1]) M1_cls_prob = self.M1_cls_score_softmax(M1_cls_score) M1_cls_prob = M1_cls_prob.view(M1_cls_prob.value[0], 4, -1, M1_cls_prob.value[-1]) return (M1_bbox_pred, M2_bbox_pred, M3_bbox_pred), (M1_cls_prob, M2_cls_prob, M3_cls_prob)
def forward(self, up_from, up_to): conv1 = self.conv1(up_from) conv1 = self.conv1_relu(conv1) upsampling = self.upsampling(conv1) conv2 = self.conv2(up_to) conv2 = self.conv2_relu(up_to) if upsampling.value[2] != conv2.value[2] or upsampling.value[ 3] != conv2.value[3]: # upsampling = upsampling[:, :, 0:conv2.size(2), 0:conv2.size(3)] pads = (0, conv2.value[3] - upsampling.value[3], 0, conv2.value[2] - upsampling.value[2]) upsampling = F.pad(upsampling, pads) fuse = self.eltmul(upsampling, conv2) return fuse
def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) conv4_3 = self.conv4(x) # branch 1 conv5_3 = self.conv5(self.pool4(conv4_3)) conv5_256 = self.conv5_256(conv5_3) conv5_256_up = self.conv5_256_up(conv5_256) # branch 2 conv4_256 = self.conv4_256(conv4_3) # fuse(conv5_256_up, conv4_256) if conv5_256_up.value[2] != conv4_256.value[3] or conv5_256_up.value[ 3] != conv4_256.value[3]: pad = (0, conv4_256.value[3] - conv5_256_up.value[3], 0, conv4_256.value[2] - conv5_256_up.value[2]) conv5_256_up = F.pad(conv5_256_up, pad) conv4_fuse = flops_counter.cat([conv5_256_up, conv4_256], 1) conv4_fuse_final_dim_red = self.conv4_fuse_final_dim_red( self.conv4_fuse_final(conv4_fuse)) head1_f = self.head1(conv4_fuse_final_dim_red) head2_f = self.head2(conv4_fuse_final_dim_red) head4_f = self.head4(conv4_fuse_final_dim_red) outs = [head1_f, head2_f, head4_f] loc = [] conf = [] for i, o in enumerate(outs): loc += [self.bbox_head[i](o)] conf += [self.cls_head[i](o)] loc_cat = flops_counter.cat(loc, 1) conf_cat = self.softmax(flops_counter.cat(conf, 2)) return loc_cat, conf_cat
def forward(self, x): loc = list() conf = list() ###### # Backbone ###### conv3_3_x = self.layer1(x) conv4_3_x = self.layer2(conv3_3_x) conv5_3_x = self.layer3(conv4_3_x) fc7_x = self.layer4(conv5_3_x) conv6_2_x = self.layer5(fc7_x) conv7_2_x = self.layer6(conv6_2_x) ###### # dsfd specific layers ###### # fpn lfpn3_fc7_x_up = self.upsample(self.latlayer3(fc7_x)) lfpn3_conv5_3_x = self.smooth3(conv5_3_x) if lfpn3_fc7_x_up.value[2] != lfpn3_conv5_3_x.value[ 2] or lfpn3_fc7_x_up.value[3] != lfpn3_conv5_3_x.value[3]: pad = (0, lfpn3_conv5_3_x.value[3] - lfpn3_fc7_x_up.value[3], 0, lfpn3_conv5_3_x.value[2] - lfpn3_fc7_x_up.value[2]) lfpn3_fc7_x_up = F.pad(lfpn3_fc7_x_up, pad) lfpn3 = self.eltmul(lfpn3_fc7_x_up, lfpn3_conv5_3_x) lfpn2_lfpn3_up = self.upsample(self.latlayer2(lfpn3)) lfpn2_conv4_3_x = self.smooth2(conv4_3_x) if lfpn2_lfpn3_up.value[2] != lfpn2_conv4_3_x.value[ 2] or lfpn2_lfpn3_up.value[3] != lfpn2_conv4_3_x.value[3]: pad = (0, lfpn2_conv4_3_x.value[3] - lfpn2_lfpn3_up.value[3], 0, lfpn2_conv4_3_x.value[2] - lfpn2_lfpn3_up.value[2]) lfpn2_lfpn3_up = F.pad(lfpn2_lfpn3_up, pad) lfpn2 = self.eltmul(lfpn2_lfpn3_up, lfpn2_conv4_3_x) lfpn1_lfpn2_up = self.upsample(self.latlayer1(lfpn2)) lfpn1_conv3_3_x = self.smooth1(conv3_3_x) if lfpn1_lfpn2_up.value[2] != lfpn1_conv3_3_x.value[ 2] or lfpn1_lfpn2_up.value[3] != lfpn1_conv3_3_x.value[3]: pad = (0, lfpn1_conv3_3_x.value[3] - lfpn1_lfpn2_up.value[3], 0, lfpn1_conv3_3_x.value[2] - lfpn1_lfpn2_up.value[2]) lfpn1_lfpn2_up = F.pad(lfpn1_lfpn2_up, pad) lfpn1 = self.eltmul(lfpn1_lfpn2_up, lfpn1_conv3_3_x) conv5_3_x = lfpn3 conv4_3_x = lfpn2 conv3_3_x = lfpn1 # fem sources = [ conv3_3_x, conv4_3_x, conv5_3_x, fc7_x, conv6_2_x, conv7_2_x ] sources[0] = self.cpm3_3(sources[0]) sources[1] = self.cpm4_3(sources[1]) sources[2] = self.cpm5_3(sources[2]) sources[3] = self.cpm7(sources[3]) sources[4] = self.cpm6_2(sources[4]) sources[5] = self.cpm7_2(sources[5]) # apply multibox head to source layers loc = list() conf = list() for x, l, c in zip(sources, self.loc, self.conf): # l(x) loc.append(l(x).permute(0, 2, 3, 1)) # mio: max_in_out conf.append(c(x).permute(0, 2, 3, 1)) # face_conf = flops_counter.cat([flops_counter.view([o[1], o[2], 2], (1, -1)) for o in conf], 1) # output = self.softmax(flops_counter.view(face_conf, (1, -1, 2))) face_confs = list() for o in conf: dst = [i for i in o.value] dst[-1] = 2 face_confs.append(flops_counter.TensorSize(dst)) face_conf = flops_counter.cat( [o.view(o.value[0], -1) for o in face_confs], 1) output = self.softmax(face_conf.view(face_conf.value[0], -1, 2)) return output