def forward(self, x): #print(x.shape) ret = self.pretrained_model(x) #print(ret) resnet_out, rpn_feature, feature = self.pretrained_model(x) x_pad = F.pad(x, (self.pad_side, self.pad_side, self.pad_side, self.pad_side), mode='constant', value=0) batch = x.size(0) # we will reshape rpn to shape: batch * nb_anchor rpn_score = self.proposal_net(rpn_feature.detach()) all_cdds = [ np.concatenate((x.reshape(-1, 1), self.edge_anchors.copy(), np.arange(0, len(x)).reshape(-1, 1)), axis=1) for x in rpn_score.data.cpu().numpy()] top_n_cdds = [hard_nms(x, topn=self.topN, iou_thresh=0.25) for x in all_cdds] top_n_cdds = np.array(top_n_cdds) top_n_index = top_n_cdds[:, :, -1].astype(np.int) top_n_index = torch.from_numpy(top_n_index).cuda() top_n_prob = torch.gather(rpn_score, dim=1, index=top_n_index) part_imgs = torch.zeros([batch, self.topN, 3, 224, 224]).cuda() for i in range(batch): for j in range(self.topN): [y0, x0, y1, x1] = top_n_cdds[i][j, 1:5].astype(np.int) part_imgs[i:i + 1, j] = F.interpolate(x_pad[i:i + 1, :, y0:y1, x0:x1], size=(224, 224), mode='bilinear', align_corners=True) part_imgs = part_imgs.view(batch * self.topN, 3, 224, 224) _, _, part_features = self.pretrained_model(part_imgs.detach()) part_feature = part_features.view(batch, self.topN, -1) part_feature = part_feature[:, :CAT_NUM, ...].contiguous() part_feature = part_feature.view(batch, -1) # concat_logits have the shape: B*200 concat_out = torch.cat([part_feature, feature], dim=1) concat_logits = self.concat_net(concat_out) raw_logits = resnet_out # part_logits have the shape: B*N*200 part_logits = self.partcls_net(part_features).view(batch, self.topN, -1) return [raw_logits, concat_logits, part_logits, top_n_index, top_n_prob]
def forward(self, x): resnet_out, rpn_feature, feature = self.pretrained_model(x) # RPN 的输入为 backbone (VGG16, ResNet, etc) 的输出(简称 feature maps)。 # pretrained_model 是resnet50 x_pad = F.pad( x, (self.pad_side, self.pad_side, self.pad_side, self.pad_side), mode='constant', value=0) batch = x.size(0) # we will reshape rpn to shape: batch * nb_anchor """ RPN 包括以下部分: 生成 anchor boxes 判断每个 anchor box 为 foreground(包含物体) 或者 background(背景) ,二分类 边界框回归(bounding box regression) 对 anchor box 进行微调,使得 positive anchor 和真实框(Ground Truth Box)更加接近 """ # imformation score for Navigator rpn_score = self.proposal_net(rpn_feature.detach()) # all candidates img box (proposal number) all_cdds = [ np.concatenate((x.reshape(-1, 1), self.edge_anchors.copy(), np.arange(0, len(x)).reshape(-1, 1)), axis=1) for x in rpn_score.data.cpu().numpy() ] # batch size 内所有照片的top proposal number 的index,probability top_n_cdds = [ hard_nms(x, topn=self.topN, iou_thresh=0.25) for x in all_cdds ] top_n_cdds = np.array(top_n_cdds) top_n_index = top_n_cdds[:, :, -1].astype(np.int) top_n_index = torch.from_numpy(top_n_index).to(device) top_n_prob = torch.gather(rpn_score, dim=1, index=top_n_index) # 数据进行下采样,抽取重要信息,减少计算量 part_imgs = torch.zeros([batch, self.topN, 3, 224, 224]).to(device) for i in range(batch): for j in range(self.topN): [y0, x0, y1, x1] = top_n_cdds[i][j, 1:5].astype(np.int) part_imgs[i:i + 1, j] = F.interpolate(x_pad[i:i + 1, :, y0:y1, x0:x1], size=(224, 224), mode='bilinear', align_corners=True) part_imgs = part_imgs.view(batch * self.topN, 3, 224, 224) # fed into feature extractor to generate those K regions’ feature vector, each with length 2048. _, _, part_features = self.pretrained_model(part_imgs.detach()) # TOP K candidates's feature part_feature = part_features.view(batch, self.topN, -1) part_feature = part_feature[:, :CAT_NUM, ...].contiguous() part_feature = part_feature.view(batch, -1) # concat_logits have the shape: B*200 concat_out = torch.cat([part_feature, feature], dim=1) # 全连接层,输出200 concat_logits = self.concat_net(concat_out) raw_logits = resnet_out # part_logits have the shape: B*N*200 # Teacher's output: confidence part_logits = self.partcls_net(part_features).view( batch, self.topN, -1) # top_n = Top K return [ raw_logits, concat_logits, part_logits, top_n_index, top_n_prob ]
def forward(self, x): resnet_out, rpn_feature, feature = self.pretrained_model(x) """resnet50 - resnet_out = torch.Size([16, 209]) - rpn_feature = torch.Size([16, 2048, 7, 7]) - feature = torch.Size([16, 2048]) """ """resnet152 - resnet_out = torch.Size([16, 209]) - rpn_feature = torch.Size([16, 2048, 7, 7]) - feature = torch.Size([16, 2048]) """ """resnext50 with BATCH = 32 - resnet_out = torch.Size([16, 209]) - rpn_feature = torch.Size([16, 2048, 7, 7]) - feature = torch.Size([16, 2048]) """ """resnext101 with BATCH = 16 - resnet_out = torch.Size([8, 209]) - rpn_feature = torch.Size([8, 2048, 7, 7]) - feature = torch.Size([8, 2048]) """ #print("resnet_out, rpn_feature, feature =", resnet_out.shape, rpn_feature.shape, feature.shape) x_pad = F.pad(x, (self.pad_side, self.pad_side, self.pad_side, self.pad_side), mode='constant', value=0) batch = x.size(0) # we will reshape rpn to shape: batch * nb_anchor rpn_score = self.proposal_net(rpn_feature.detach()) """resnet50 - rpn_score = torch.Size([16, 426]) <class 'torch.Tensor'> - edge_anchor = (426, 4) <class 'numpy.ndarray'> """ """resnet152 - rpn_score = torch.Size([16, 426]) <class 'torch.Tensor'> - edge_anchor = (426, 4) <class 'numpy.ndarray'> """ """resnext50 - rpn_score = torch.Size([16, 426]) <class 'torch.Tensor'> - edge_anchor = (426, 4) <class 'numpy.ndarray'> """ """resnext101 - rpn_score = torch.Size([8, 426]) <class 'torch.Tensor'> - edge_anchor = (426, 4) <class 'numpy.ndarray'> """ #print("debug, rpn_score=", rpn_score.size(), type(rpn_score)) #print("edge_anchor=", self.edge_anchors.shape, type(self.edge_anchors)) all_cdds = [ np.concatenate((x.reshape(-1, 1), self.edge_anchors.copy(), np.arange(0, len(x)).reshape(-1, 1)), axis=1) for x in rpn_score.data.cpu().numpy()] top_n_cdds = [hard_nms(x, topn=self.topN, iou_thresh=0.25) for x in all_cdds] top_n_cdds = np.array(top_n_cdds) top_n_index = top_n_cdds[:, :, -1].astype(np.int) top_n_index = torch.from_numpy(top_n_index).cuda() top_n_prob = torch.gather(rpn_score, dim=1, index=top_n_index) part_imgs = torch.zeros([batch, self.topN, 3, PART_IMAGE_SIZE, PART_IMAGE_SIZE]).cuda() for i in range(batch): for j in range(self.topN): [y0, x0, y1, x1] = top_n_cdds[i][j, 1:5].astype(np.int) part_imgs[i:i + 1, j] = F.interpolate(x_pad[i : i + 1, :, y0 : y1, x0 : x1], size=(PART_IMAGE_SIZE, PART_IMAGE_SIZE), mode='bilinear', align_corners=True) #part_imgs = part_imgs.view(batch * self.topN, 3, 224, 224) part_imgs = part_imgs.view(batch * self.topN, 3, PART_IMAGE_SIZE, PART_IMAGE_SIZE) _, _, part_features = self.pretrained_model(part_imgs.detach()) """resnet50 - part_features = torch.Size([96, 2048]) """ """resnet152 - part_features= torch.Size([96, 2048]) """ """resnext50 - part_features= torch.Size([96, 2048]) """ """resnext101 - part_features= torch.Size([48, 2048]) """ #print("part_features=", part_features.size()) part_feature = part_features.view(batch, self.topN, -1) part_feature = part_feature[:, :CAT_NUM, ...].contiguous() part_feature = part_feature.view(batch, -1) # concat_logits have the shape: B*200/209 concat_out = torch.cat([part_feature, feature], dim=1) concat_logits = self.concat_net(concat_out) raw_logits = resnet_out # part_logits have the shape: B*topN*200/209 part_logits = self.partcls_net(part_features).view(batch, self.topN, -1) return [raw_logits, concat_logits, part_logits, top_n_index, top_n_prob]
def forward(self, x, img_raw, add=False, return_vis=False): resnet_out, rpn_feature, feature = self.pretrained_model(x) x_pad = F.pad( x, (self.pad_side, self.pad_side, self.pad_side, self.pad_side), mode='constant', value=0) # ============================================================================= # np.save('./x_pad.npy', x_pad.data.cpu().numpy()) # np.save('./x.npy', x.data.cpu().numpy()) # assert 0 # ============================================================================= batch = x.size(0) # small rpn_score_small, rpn_score_large = self.proposal_net( rpn_feature.detach()) all_cdds_small = [ np.concatenate((x.reshape(-1, 1), self.edge_anchors_small.copy(), np.arange(0, len(x)).reshape(-1, 1)), axis=1) for x in rpn_score_small.data.cpu().numpy() ] top_n_cdds_small = [ hard_nms(x, topn=self.topN // 2, iou_thresh=0.1) for x in all_cdds_small ] top_n_cdds_small = np.array(top_n_cdds_small) top_n_index_small = top_n_cdds_small[:, :, -1].astype(np.int) top_n_index_small = torch.from_numpy(top_n_index_small).cuda() top_n_prob_small = torch.gather(rpn_score_small, dim=1, index=top_n_index_small) # large rpn_score_large, rpn_score_large = self.proposal_net( rpn_feature.detach()) all_cdds_large = [ np.concatenate((x.reshape(-1, 1), self.edge_anchors_large.copy(), np.arange(0, len(x)).reshape(-1, 1)), axis=1) for x in rpn_score_large.data.cpu().numpy() ] top_n_cdds_large = [ hard_nms(x, topn=self.topN // 2, iou_thresh=0.1) for x in all_cdds_large ] top_n_cdds_large = np.array(top_n_cdds_large) top_n_index_large = top_n_cdds_large[:, :, -1].astype(np.int) top_n_index_large = torch.from_numpy(top_n_index_large).cuda() top_n_prob_large = torch.gather(rpn_score_large, dim=1, index=top_n_index_large) part_imgs = torch.zeros([batch, self.topN, 3, 224, 224]).cuda() for i in range(batch): for j in range(self.topN // 2): [y0, x0, y1, x1] = top_n_cdds_small[i][j, 1:5].astype(np.int) part_imgs[i:i + 1, j] = F.interpolate(x_pad[i:i + 1, :, y0:y1, x0:x1], size=(224, 224), mode='bilinear', align_corners=True) [y0, x0, y1, x1] = top_n_cdds_large[i][j, 1:5].astype(np.int) part_imgs[i:i + 1, j + self.topN // 2] = F.interpolate( x_pad[i:i + 1, :, y0:y1, x0:x1], size=(224, 224), mode='bilinear', align_corners=True) part_imgs = part_imgs.view(batch * self.topN, 3, 224, 224) temp, _, part_features = self.pretrained_model(part_imgs.detach()) part_feature = part_features.view(batch, self.topN, -1) part_feature = part_feature[:, :CAT_NUM, ...].contiguous() part_feature = part_feature.view(batch, -1) # ============================================================================= # # ============================================================================= x2 = x.clone() if add: for bs in range(batch): [y0, x0, y1, x1] = top_n_cdds_large[bs][0, 1:5].astype(np.int) y0, x0, y1, x1 = get_xy(y0, x0, y1, x1) y0 = np.int((y0 - 224) / 448 * 600) x0 = np.int((x0 - 224) / 448 * 600) y1 = np.int((y1 - 224) / 448 * 600) x1 = np.int((x1 - 224) / 448 * 600) x2[bs] = F.interpolate(img_raw[bs:bs + 1, :, y0:y1, x0:x1], size=(448, 448), mode='bilinear', align_corners=True) _, _, feature2 = self.pretrained_model(x2.detach()) # top_n_index = torch.cat([top_n_index_small, top_n_index_large], 1) top_n_prob = torch.cat([top_n_prob_small, top_n_prob_large], 1) if return_vis: temp = temp.view(batch, self.topN, 2).data.cpu().numpy() temp = np.exp(temp) temp = temp / temp.sum(2, keepdims=True) temp = temp[:, :, 1] top_n_cdds = np.concatenate([top_n_cdds_small, top_n_cdds_large], 1) for i in range(batch): top_n_cdds[i, :, 0] = temp[i] top_n_cdds = [ hard_nms(x, topn=2, iou_thresh=0.1) for x in top_n_cdds ] img_vis = vis(img_raw, top_n_cdds) try: anchor_lst = np.array(top_n_cdds)[:, :2] except: anchor_lst = np.array(top_n_cdds)[:, :2] # concat_logits have the shape: B*200 concat_out = torch.cat([part_feature, feature, feature2], dim=1) concat_logits = self.concat_net(concat_out) raw_logits = resnet_out # (resnet_out + att_logits) / 2 # part_logits have the shape: B*N*200 part_logits = self.partcls_net(part_features).view( batch, self.topN, -1) if return_vis: return [ raw_logits, concat_logits, part_logits, top_n_index, top_n_prob, img_vis, anchor_lst ] else: return [ raw_logits, concat_logits, part_logits, top_n_index, top_n_prob ]