Exemple #1
0
 def forward(self, x):
     #print(x.shape)
     ret = self.pretrained_model(x)
     #print(ret)
     resnet_out, rpn_feature, feature = self.pretrained_model(x)
     x_pad = F.pad(x, (self.pad_side, self.pad_side, self.pad_side, self.pad_side), mode='constant', value=0)
     batch = x.size(0)
     # we will reshape rpn to shape: batch * nb_anchor
     rpn_score = self.proposal_net(rpn_feature.detach())
     all_cdds = [
         np.concatenate((x.reshape(-1, 1), self.edge_anchors.copy(), np.arange(0, len(x)).reshape(-1, 1)), axis=1)
         for x in rpn_score.data.cpu().numpy()]
     top_n_cdds = [hard_nms(x, topn=self.topN, iou_thresh=0.25) for x in all_cdds]
     top_n_cdds = np.array(top_n_cdds)
     top_n_index = top_n_cdds[:, :, -1].astype(np.int)
     top_n_index = torch.from_numpy(top_n_index).cuda()
     top_n_prob = torch.gather(rpn_score, dim=1, index=top_n_index)
     part_imgs = torch.zeros([batch, self.topN, 3, 224, 224]).cuda()
     for i in range(batch):
         for j in range(self.topN):
             [y0, x0, y1, x1] = top_n_cdds[i][j, 1:5].astype(np.int)
             part_imgs[i:i + 1, j] = F.interpolate(x_pad[i:i + 1, :, y0:y1, x0:x1], size=(224, 224), mode='bilinear',
                                                   align_corners=True)
     part_imgs = part_imgs.view(batch * self.topN, 3, 224, 224)
     _, _, part_features = self.pretrained_model(part_imgs.detach())
     part_feature = part_features.view(batch, self.topN, -1)
     part_feature = part_feature[:, :CAT_NUM, ...].contiguous()
     part_feature = part_feature.view(batch, -1)
     # concat_logits have the shape: B*200
     concat_out = torch.cat([part_feature, feature], dim=1)
     concat_logits = self.concat_net(concat_out)
     raw_logits = resnet_out
     # part_logits have the shape: B*N*200
     part_logits = self.partcls_net(part_features).view(batch, self.topN, -1)
     return [raw_logits, concat_logits, part_logits, top_n_index, top_n_prob]
Exemple #2
0
    def forward(self, x):
        resnet_out, rpn_feature, feature = self.pretrained_model(x)
        # RPN 的输入为 backbone (VGG16, ResNet, etc) 的输出(简称 feature maps)。
        # pretrained_model 是resnet50

        x_pad = F.pad(
            x, (self.pad_side, self.pad_side, self.pad_side, self.pad_side),
            mode='constant',
            value=0)
        batch = x.size(0)
        # we will reshape rpn to shape: batch * nb_anchor
        """
        RPN 包括以下部分:
        生成 anchor boxes
        判断每个 anchor box 为 foreground(包含物体) 或者 background(背景) ,二分类
        边界框回归(bounding box regression) 对 anchor box 进行微调,使得 positive anchor 和真实框(Ground Truth Box)更加接近
        """
        # imformation score for Navigator
        rpn_score = self.proposal_net(rpn_feature.detach())

        # all candidates img box (proposal number)
        all_cdds = [
            np.concatenate((x.reshape(-1, 1), self.edge_anchors.copy(),
                            np.arange(0, len(x)).reshape(-1, 1)),
                           axis=1) for x in rpn_score.data.cpu().numpy()
        ]
        # batch size 内所有照片的top proposal number 的index,probability
        top_n_cdds = [
            hard_nms(x, topn=self.topN, iou_thresh=0.25) for x in all_cdds
        ]
        top_n_cdds = np.array(top_n_cdds)
        top_n_index = top_n_cdds[:, :, -1].astype(np.int)
        top_n_index = torch.from_numpy(top_n_index).to(device)
        top_n_prob = torch.gather(rpn_score, dim=1, index=top_n_index)
        # 数据进行下采样,抽取重要信息,减少计算量
        part_imgs = torch.zeros([batch, self.topN, 3, 224, 224]).to(device)
        for i in range(batch):
            for j in range(self.topN):
                [y0, x0, y1, x1] = top_n_cdds[i][j, 1:5].astype(np.int)
                part_imgs[i:i + 1, j] = F.interpolate(x_pad[i:i + 1, :, y0:y1,
                                                            x0:x1],
                                                      size=(224, 224),
                                                      mode='bilinear',
                                                      align_corners=True)
        part_imgs = part_imgs.view(batch * self.topN, 3, 224, 224)
        # fed into feature extractor to generate those K regions’ feature vector, each with length 2048.
        _, _, part_features = self.pretrained_model(part_imgs.detach())
        # TOP K candidates's feature
        part_feature = part_features.view(batch, self.topN, -1)
        part_feature = part_feature[:, :CAT_NUM, ...].contiguous()
        part_feature = part_feature.view(batch, -1)
        # concat_logits have the shape: B*200
        concat_out = torch.cat([part_feature, feature], dim=1)
        # 全连接层,输出200
        concat_logits = self.concat_net(concat_out)
        raw_logits = resnet_out
        # part_logits have the shape: B*N*200
        # Teacher's output: confidence
        part_logits = self.partcls_net(part_features).view(
            batch, self.topN, -1)
        # top_n = Top K
        return [
            raw_logits, concat_logits, part_logits, top_n_index, top_n_prob
        ]
Exemple #3
0
    def forward(self, x):
        resnet_out, rpn_feature, feature = self.pretrained_model(x)

        """resnet50
        - resnet_out = torch.Size([16, 209]) 
        - rpn_feature = torch.Size([16, 2048, 7, 7])
        - feature = torch.Size([16, 2048])
        """
        """resnet152
        - resnet_out = torch.Size([16, 209]) 
        - rpn_feature = torch.Size([16, 2048, 7, 7])
        - feature = torch.Size([16, 2048])
        """
        """resnext50 with BATCH = 32
        - resnet_out = torch.Size([16, 209]) 
        - rpn_feature = torch.Size([16, 2048, 7, 7])
        - feature = torch.Size([16, 2048])
        """
        """resnext101 with BATCH = 16
        - resnet_out = torch.Size([8, 209]) 
        - rpn_feature = torch.Size([8, 2048, 7, 7])
        - feature = torch.Size([8, 2048])
        """
        #print("resnet_out, rpn_feature, feature =", resnet_out.shape, rpn_feature.shape, feature.shape)
        
        x_pad = F.pad(x, (self.pad_side, self.pad_side, self.pad_side, self.pad_side), mode='constant', value=0)
        batch = x.size(0)

        # we will reshape rpn to shape: batch * nb_anchor
        rpn_score = self.proposal_net(rpn_feature.detach())

        """resnet50
        - rpn_score = torch.Size([16, 426]) <class 'torch.Tensor'>
        - edge_anchor = (426, 4) <class 'numpy.ndarray'>
        """
        """resnet152
        - rpn_score = torch.Size([16, 426]) <class 'torch.Tensor'>
        - edge_anchor = (426, 4) <class 'numpy.ndarray'>
        """
        """resnext50
        - rpn_score = torch.Size([16, 426]) <class 'torch.Tensor'>
        - edge_anchor = (426, 4) <class 'numpy.ndarray'>
        """
        """resnext101
        - rpn_score = torch.Size([8, 426]) <class 'torch.Tensor'>
        - edge_anchor = (426, 4) <class 'numpy.ndarray'>
        """
        #print("debug, rpn_score=", rpn_score.size(), type(rpn_score))
        #print("edge_anchor=", self.edge_anchors.shape, type(self.edge_anchors))

        all_cdds = [
            np.concatenate((x.reshape(-1, 1), self.edge_anchors.copy(), np.arange(0, len(x)).reshape(-1, 1)), axis=1)
            for x in rpn_score.data.cpu().numpy()]

        top_n_cdds = [hard_nms(x, topn=self.topN, iou_thresh=0.25) for x in all_cdds]
        top_n_cdds = np.array(top_n_cdds)
        top_n_index = top_n_cdds[:, :, -1].astype(np.int)
        top_n_index = torch.from_numpy(top_n_index).cuda()
        top_n_prob = torch.gather(rpn_score, dim=1, index=top_n_index)
        part_imgs = torch.zeros([batch, self.topN, 3, PART_IMAGE_SIZE, PART_IMAGE_SIZE]).cuda()

        for i in range(batch):
            for j in range(self.topN):
                [y0, x0, y1, x1] = top_n_cdds[i][j, 1:5].astype(np.int)
                part_imgs[i:i + 1, j] = F.interpolate(x_pad[i : i + 1, :, y0 : y1, x0 : x1],
                        size=(PART_IMAGE_SIZE, PART_IMAGE_SIZE), mode='bilinear', align_corners=True)

        #part_imgs = part_imgs.view(batch * self.topN, 3, 224, 224)
        part_imgs = part_imgs.view(batch * self.topN, 3, PART_IMAGE_SIZE, PART_IMAGE_SIZE)
        _, _, part_features = self.pretrained_model(part_imgs.detach())

        """resnet50
        - part_features = torch.Size([96, 2048])
        """
        """resnet152
        - part_features= torch.Size([96, 2048]) 
        """
        """resnext50
        - part_features= torch.Size([96, 2048]) 
        """
        """resnext101
        - part_features= torch.Size([48, 2048]) 
        """
        #print("part_features=", part_features.size())

        part_feature = part_features.view(batch, self.topN, -1)
        part_feature = part_feature[:, :CAT_NUM, ...].contiguous()
        part_feature = part_feature.view(batch, -1)

        # concat_logits have the shape: B*200/209
        concat_out = torch.cat([part_feature, feature], dim=1)
        concat_logits = self.concat_net(concat_out)
        raw_logits = resnet_out

        # part_logits have the shape: B*topN*200/209
        part_logits = self.partcls_net(part_features).view(batch, self.topN, -1)
        return [raw_logits, concat_logits, part_logits, top_n_index, top_n_prob]
Exemple #4
0
    def forward(self, x, img_raw, add=False, return_vis=False):
        resnet_out, rpn_feature, feature = self.pretrained_model(x)
        x_pad = F.pad(
            x, (self.pad_side, self.pad_side, self.pad_side, self.pad_side),
            mode='constant',
            value=0)
        # =============================================================================
        #         np.save('./x_pad.npy', x_pad.data.cpu().numpy())
        #         np.save('./x.npy', x.data.cpu().numpy())
        #         assert 0
        # =============================================================================
        batch = x.size(0)
        # small
        rpn_score_small, rpn_score_large = self.proposal_net(
            rpn_feature.detach())
        all_cdds_small = [
            np.concatenate((x.reshape(-1, 1), self.edge_anchors_small.copy(),
                            np.arange(0, len(x)).reshape(-1, 1)),
                           axis=1) for x in rpn_score_small.data.cpu().numpy()
        ]
        top_n_cdds_small = [
            hard_nms(x, topn=self.topN // 2, iou_thresh=0.1)
            for x in all_cdds_small
        ]
        top_n_cdds_small = np.array(top_n_cdds_small)
        top_n_index_small = top_n_cdds_small[:, :, -1].astype(np.int)
        top_n_index_small = torch.from_numpy(top_n_index_small).cuda()
        top_n_prob_small = torch.gather(rpn_score_small,
                                        dim=1,
                                        index=top_n_index_small)
        # large
        rpn_score_large, rpn_score_large = self.proposal_net(
            rpn_feature.detach())
        all_cdds_large = [
            np.concatenate((x.reshape(-1, 1), self.edge_anchors_large.copy(),
                            np.arange(0, len(x)).reshape(-1, 1)),
                           axis=1) for x in rpn_score_large.data.cpu().numpy()
        ]
        top_n_cdds_large = [
            hard_nms(x, topn=self.topN // 2, iou_thresh=0.1)
            for x in all_cdds_large
        ]
        top_n_cdds_large = np.array(top_n_cdds_large)
        top_n_index_large = top_n_cdds_large[:, :, -1].astype(np.int)
        top_n_index_large = torch.from_numpy(top_n_index_large).cuda()
        top_n_prob_large = torch.gather(rpn_score_large,
                                        dim=1,
                                        index=top_n_index_large)

        part_imgs = torch.zeros([batch, self.topN, 3, 224, 224]).cuda()
        for i in range(batch):
            for j in range(self.topN // 2):
                [y0, x0, y1, x1] = top_n_cdds_small[i][j, 1:5].astype(np.int)
                part_imgs[i:i + 1, j] = F.interpolate(x_pad[i:i + 1, :, y0:y1,
                                                            x0:x1],
                                                      size=(224, 224),
                                                      mode='bilinear',
                                                      align_corners=True)
                [y0, x0, y1, x1] = top_n_cdds_large[i][j, 1:5].astype(np.int)
                part_imgs[i:i + 1, j + self.topN // 2] = F.interpolate(
                    x_pad[i:i + 1, :, y0:y1, x0:x1],
                    size=(224, 224),
                    mode='bilinear',
                    align_corners=True)

        part_imgs = part_imgs.view(batch * self.topN, 3, 224, 224)
        temp, _, part_features = self.pretrained_model(part_imgs.detach())
        part_feature = part_features.view(batch, self.topN, -1)
        part_feature = part_feature[:, :CAT_NUM, ...].contiguous()
        part_feature = part_feature.view(batch, -1)
        # =============================================================================
        #
        # =============================================================================
        x2 = x.clone()
        if add:
            for bs in range(batch):
                [y0, x0, y1, x1] = top_n_cdds_large[bs][0, 1:5].astype(np.int)
                y0, x0, y1, x1 = get_xy(y0, x0, y1, x1)
                y0 = np.int((y0 - 224) / 448 * 600)
                x0 = np.int((x0 - 224) / 448 * 600)
                y1 = np.int((y1 - 224) / 448 * 600)
                x1 = np.int((x1 - 224) / 448 * 600)
                x2[bs] = F.interpolate(img_raw[bs:bs + 1, :, y0:y1, x0:x1],
                                       size=(448, 448),
                                       mode='bilinear',
                                       align_corners=True)
        _, _, feature2 = self.pretrained_model(x2.detach())  #

        top_n_index = torch.cat([top_n_index_small, top_n_index_large], 1)
        top_n_prob = torch.cat([top_n_prob_small, top_n_prob_large], 1)

        if return_vis:
            temp = temp.view(batch, self.topN, 2).data.cpu().numpy()
            temp = np.exp(temp)
            temp = temp / temp.sum(2, keepdims=True)
            temp = temp[:, :, 1]
            top_n_cdds = np.concatenate([top_n_cdds_small, top_n_cdds_large],
                                        1)
            for i in range(batch):
                top_n_cdds[i, :, 0] = temp[i]

            top_n_cdds = [
                hard_nms(x, topn=2, iou_thresh=0.1) for x in top_n_cdds
            ]
            img_vis = vis(img_raw, top_n_cdds)
            try:
                anchor_lst = np.array(top_n_cdds)[:, :2]
            except:
                anchor_lst = np.array(top_n_cdds)[:, :2]
        # concat_logits have the shape: B*200
        concat_out = torch.cat([part_feature, feature, feature2], dim=1)
        concat_logits = self.concat_net(concat_out)
        raw_logits = resnet_out  # (resnet_out + att_logits) / 2
        # part_logits have the shape: B*N*200
        part_logits = self.partcls_net(part_features).view(
            batch, self.topN, -1)
        if return_vis:
            return [
                raw_logits, concat_logits, part_logits, top_n_index,
                top_n_prob, img_vis, anchor_lst
            ]
        else:
            return [
                raw_logits, concat_logits, part_logits, top_n_index, top_n_prob
            ]