def sample(self, proposal_bboxes: Tensor, gt_classes: Tensor, gt_bboxes: Tensor) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
            sample_fg_indices = torch.arange(end=len(proposal_bboxes), dtype=torch.long)
            sample_selected_indices = torch.arange(end=len(proposal_bboxes), dtype=torch.long)

            # find labels for each `proposal_bboxes`
            labels = torch.ones(len(proposal_bboxes), dtype=torch.long).cuda() * -1
            ious = BBox.iou(proposal_bboxes, gt_bboxes)
            proposal_max_ious, proposal_assignments = ious.max(dim=1)
            labels[proposal_max_ious < 0.5] = 0
            labels[proposal_max_ious >= 0.5] = gt_classes[proposal_assignments[proposal_max_ious >= 0.5]]

            # select 128 samples
            fg_indices = (labels > 0).nonzero().view(-1)
            bg_indices = (labels == 0).nonzero().view(-1)
            fg_indices = fg_indices[torch.randperm(len(fg_indices))[:min(len(fg_indices), 32)]]
            bg_indices = bg_indices[torch.randperm(len(bg_indices))[:128 - len(fg_indices)]]
            selected_indices = torch.cat([fg_indices, bg_indices])
            selected_indices = selected_indices[torch.randperm(len(selected_indices))]

            proposal_bboxes = proposal_bboxes[selected_indices]
            gt_proposal_transformers = BBox.calc_transformer(proposal_bboxes, gt_bboxes[proposal_assignments[selected_indices]])
            gt_proposal_classes = labels[selected_indices]

            gt_proposal_transformers = (gt_proposal_transformers - self._transformer_normalize_mean) / self._transformer_normalize_std

            gt_proposal_transformers = gt_proposal_transformers.cuda()
            gt_proposal_classes = gt_proposal_classes.cuda()

            sample_fg_indices = sample_fg_indices[fg_indices]
            sample_selected_indices = sample_selected_indices[selected_indices]

            return sample_fg_indices, sample_selected_indices, gt_proposal_classes, gt_proposal_transformers
Esempio n. 2
0
 def forward(self, features, proposal_bboxes, gt_classes_batch : Tensor = None ,  gt_bboxes_batch : Tensor = None):
     batch_size = features.shape[0]
     '''assign -1 to all labels ( -1 labels are not used for training)
        calculate iou for proposals with ground truth bboxes
        proposal_assignments contains the highest ranked ground truth bbox for each proposal
        get the maximum iou of each proposal with respect to each ground truth box and store highest ground truth box in proposal assigments
        assign 0 to all labels with iou less than 0.5
        assign class labels to all proposals with iou higher than 0.5 
        foreground ( >= 0.5 ) background ( < 0.5 )
        Take total of 128 proposal samples
        split fg and bg into 0.25 : 0.75 ratio after reshuffling
     '''
     if not self.training:
          proposal_batch_indices = torch.arange(end=batch_size, dtype=torch.long, device=proposal_bboxes.device).view(-1, 1).repeat(1, proposal_bboxes.shape[1])
          pool = Pooler.apply(features,torch.squeeze(proposal_bboxes), torch.squeeze(proposal_batch_indices), self._pooler_mode)
          pool = pool.view(pool.shape[0],-1)
          hidden = self.hidden(pool)
          proposal_classes = self._proposal_class(hidden)
          proposal_transformers = self._proposal_transformer(hidden)
          proposal_classes = proposal_classes.view(batch_size, -1, proposal_classes.shape[-1])
          proposal_transformers = proposal_transformers.view(batch_size, -1, proposal_transformers.shape[-1])
          return proposal_classes, proposal_transformers
     else:
         labels = torch.full((batch_size,proposal_bboxes.shape[1]), -1, dtype=torch.long, device=proposal_bboxes.device)
         ious = BBox.iou(proposal_bboxes,gt_bboxes_batch)
         proposal_max_ious, proposal_assignments = ious.max(dim=2)
         labels[proposal_max_ious < 0.5] = 0
         fg_masks = proposal_max_ious >= 0.5
         if len(fg_masks.nonzero()) > 0:
             labels[fg_masks] = gt_classes_batch[fg_masks.nonzero()[:,0],proposal_assignments[fg_masks]]
         fg_indices = (labels > 0).nonzero()
         bg_indices = (labels == 0).nonzero()
         fg_indices = fg_indices[torch.randperm(len(fg_indices))[:min(len(fg_indices),32*batch_size)]]
         bg_indices = bg_indices[torch.randperm(len(bg_indices))[:128 * batch_size - len(fg_indices)]]
         selected_indices = torch.cat([fg_indices,bg_indices],dim=0)
         ''' selected_indices shape : torch.Size([128, 2]) '''
         selected_indices = selected_indices[torch.randperm(len(selected_indices))].unbind(dim=1)
     
         '''
         len(selected_indices) = 128
         Assign ground truth targets of selected indices
         gt_bboxes are formed by 
         Apply ROI pooling on the features with proposal_bboxes generated
         Pass it through a hidden layer, pool and flatten
         '''
         proposal_bboxes = proposal_bboxes[selected_indices]
         gt_bboxes = gt_bboxes_batch[selected_indices[0],proposal_assignments[selected_indices]]
         gt_proposal_classes = labels[selected_indices]
         diff_x = torch.min(gt_bboxes[:,2]-gt_bboxes[:,0])
         diff_y = torch.min(gt_bboxes[:,3]-gt_bboxes[:,1])
         gt_proposal_transformers = BBox.calc_transformer(proposal_bboxes,gt_bboxes)
         batch_indices = selected_indices[0]
         pool = Pooler.apply(features, proposal_bboxes, proposal_batch_indices = batch_indices, mode = self._pooler_mode)
         pool = pool.view(pool.shape[0],-1)
         hidden = self.hidden(pool)
         proposal_classes = self._proposal_class(hidden)
         proposal_transformers = self._proposal_transformer(hidden)
         proposal_class_losses, proposal_transformer_losses = self.loss(proposal_classes,proposal_transformers,gt_proposal_classes,gt_proposal_transformers,batch_size,batch_indices)
         return proposal_classes,proposal_transformers, proposal_class_losses, proposal_transformer_losses
Esempio n. 3
0
    def sample(self, anchor_bboxes: Tensor, gt_bboxes: Tensor,
               image_width: int,
               image_height: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
        sample_fg_indices = torch.arange(end=len(anchor_bboxes),
                                         dtype=torch.long)
        sample_selected_indices = torch.arange(end=len(anchor_bboxes),
                                               dtype=torch.long)

        anchor_bboxes = anchor_bboxes.cpu()
        gt_bboxes = gt_bboxes.cpu()

        # remove cross-boundary
        boundary = torch.tensor(BBox(0, 0, image_width, image_height).tolist(),
                                dtype=torch.float)
        inside_indices = BBox.inside(
            anchor_bboxes,
            boundary.unsqueeze(dim=0)).squeeze().nonzero().view(-1)

        anchor_bboxes = anchor_bboxes[inside_indices]
        sample_fg_indices = sample_fg_indices[inside_indices]
        sample_selected_indices = sample_selected_indices[inside_indices]

        # find labels for each `anchor_bboxes`
        labels = torch.ones(len(anchor_bboxes), dtype=torch.long) * -1
        ious = BBox.iou(anchor_bboxes, gt_bboxes)
        anchor_max_ious, anchor_assignments = ious.max(dim=1)
        gt_max_ious, gt_assignments = ious.max(dim=0)
        anchor_additions = (ious == gt_max_ious).nonzero()[:, 0]
        labels[anchor_max_ious < 0.3] = 0
        labels[anchor_additions] = 1
        labels[anchor_max_ious >= 0.7] = 1

        # select 256 samples
        fg_indices = (labels == 1).nonzero().view(-1)
        bg_indices = (labels == 0).nonzero().view(-1)
        fg_indices = fg_indices[torch.randperm(
            len(fg_indices))[:min(len(fg_indices), 128)]]
        bg_indices = bg_indices[torch.randperm(
            len(bg_indices))[:256 - len(fg_indices)]]
        selected_indices = torch.cat([fg_indices, bg_indices])
        selected_indices = selected_indices[torch.randperm(
            len(selected_indices))]

        gt_anchor_objectnesses = labels[selected_indices]
        gt_bboxes = gt_bboxes[anchor_assignments[fg_indices]]
        anchor_bboxes = anchor_bboxes[fg_indices]
        gt_anchor_transformers = BBox.calc_transformer(anchor_bboxes,
                                                       gt_bboxes)

        gt_anchor_objectnesses = gt_anchor_objectnesses.cuda()
        gt_anchor_transformers = gt_anchor_transformers.cuda()

        sample_fg_indices = sample_fg_indices[fg_indices]
        sample_selected_indices = sample_selected_indices[selected_indices]

        return sample_fg_indices, sample_selected_indices, gt_anchor_objectnesses, gt_anchor_transformers
Esempio n. 4
0
        def forward(self, features: Tensor, proposal_bboxes: Tensor,
                    gt_classes_batch: Optional[Tensor] = None, gt_bboxes_batch: Optional[Tensor] = None) -> Union[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor, Tensor, Tensor]]:
            batch_size = features.shape[0]

            if not self.training:
                proposal_batch_indices = torch.arange(end=batch_size, dtype=torch.long, device=proposal_bboxes.device).view(-1, 1).repeat(1, proposal_bboxes.shape[1])
                pool = Pooler.apply(features, proposal_bboxes.view(-1, 4), proposal_batch_indices.view(-1), mode=self._pooler_mode)
                hidden = self.hidden(pool)
                hidden = F.adaptive_max_pool2d(input=hidden, output_size=1)
                hidden = hidden.view(hidden.shape[0], -1)

                proposal_classes = self._proposal_class(hidden)
                proposal_transformers = self._proposal_transformer(hidden)

                proposal_classes = proposal_classes.view(batch_size, -1, proposal_classes.shape[-1])
                proposal_transformers = proposal_transformers.view(batch_size, -1, proposal_transformers.shape[-1])
                return proposal_classes, proposal_transformers
            else:
                # find labels for each `proposal_bboxes`
                labels = torch.full((batch_size, proposal_bboxes.shape[1]), -1, dtype=torch.long, device=proposal_bboxes.device)
                ious = BBox.iou(proposal_bboxes, gt_bboxes_batch)
                proposal_max_ious, proposal_assignments = ious.max(dim=2)
                labels[proposal_max_ious < 0.5] = 0
                fg_masks = proposal_max_ious >= 0.5
                if len(fg_masks.nonzero()) > 0:
                    labels[fg_masks] = gt_classes_batch[fg_masks.nonzero()[:, 0], proposal_assignments[fg_masks]]

                # select 128 x `batch_size` samples
                fg_indices = (labels > 0).nonzero()
                bg_indices = (labels == 0).nonzero()
                fg_indices = fg_indices[torch.randperm(len(fg_indices))[:min(len(fg_indices), 32 * batch_size)]]
                bg_indices = bg_indices[torch.randperm(len(bg_indices))[:128 * batch_size - len(fg_indices)]]
                selected_indices = torch.cat([fg_indices, bg_indices], dim=0)
                selected_indices = selected_indices[torch.randperm(len(selected_indices))].unbind(dim=1)

                proposal_bboxes = proposal_bboxes[selected_indices]
                gt_bboxes = gt_bboxes_batch[selected_indices[0], proposal_assignments[selected_indices]]
                gt_proposal_classes = labels[selected_indices]
                gt_proposal_transformers = BBox.calc_transformer(proposal_bboxes, gt_bboxes)
                batch_indices = selected_indices[0]

                pool = Pooler.apply(features, proposal_bboxes, proposal_batch_indices=batch_indices, mode=self._pooler_mode)
                hidden = self.hidden(pool)
                hidden = F.adaptive_max_pool2d(input=hidden, output_size=1)
                hidden = hidden.view(hidden.shape[0], -1)

                proposal_classes = self._proposal_class(hidden)
                proposal_transformers = self._proposal_transformer(hidden)
                proposal_class_losses, proposal_transformer_losses = self.loss(proposal_classes, proposal_transformers,
                                                                               gt_proposal_classes, gt_proposal_transformers,
                                                                               batch_size, batch_indices)

                return proposal_classes, proposal_transformers, proposal_class_losses, proposal_transformer_losses
Esempio n. 5
0
    def forward(self, features: Tensor,
                anchor_bboxes: Optional[Tensor] = None, gt_bboxes_batch: Optional[Tensor] = None,
                image_width: Optional[int]=None, image_height: Optional[int]=None) -> Union[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor, Tensor, Tensor]]:
        batch_size = features.shape[0]

        features = self._features(features)
        anchor_objectnesses = self._anchor_objectness(features)
        anchor_transformers = self._anchor_transformer(features)

        anchor_objectnesses = anchor_objectnesses.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
        anchor_transformers = anchor_transformers.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 4)

        if not self.training:
            return anchor_objectnesses, anchor_transformers
        else:
            # remove cross-boundary
            # NOTE: The length of `inside_indices` is guaranteed to be a multiple of `anchor_bboxes.shape[0]` as each batch in `anchor_bboxes` is the same
            inside_indices = BBox.inside(anchor_bboxes, left=0, top=0, right=image_width, bottom=image_height).nonzero().unbind(dim=1)
            inside_anchor_bboxes = anchor_bboxes[inside_indices].view(batch_size, -1, anchor_bboxes.shape[2])
            inside_anchor_objectnesses = anchor_objectnesses[inside_indices].view(batch_size, -1, anchor_objectnesses.shape[2])
            inside_anchor_transformers = anchor_transformers[inside_indices].view(batch_size, -1, anchor_transformers.shape[2])

            # find labels for each `anchor_bboxes`
            labels = torch.full((batch_size, inside_anchor_bboxes.shape[1]), -1, dtype=torch.long, device=inside_anchor_bboxes.device)
            ious = BBox.iou(inside_anchor_bboxes, gt_bboxes_batch)
            anchor_max_ious, anchor_assignments = ious.max(dim=2)
            gt_max_ious, gt_assignments = ious.max(dim=1)
            anchor_additions = ((ious > 0) & (ious == gt_max_ious.unsqueeze(dim=1))).nonzero()[:, :2].unbind(dim=1)
            labels[anchor_max_ious < 0.3] = 0
            labels[anchor_additions] = 1
            labels[anchor_max_ious >= 0.7] = 1

            # select 256 x `batch_size` samples
            fg_indices = (labels == 1).nonzero()
            bg_indices = (labels == 0).nonzero()
            fg_indices = fg_indices[torch.randperm(len(fg_indices))[:min(len(fg_indices), 128 * batch_size)]]
            bg_indices = bg_indices[torch.randperm(len(bg_indices))[:256 * batch_size - len(fg_indices)]]
            selected_indices = torch.cat([fg_indices, bg_indices], dim=0)
            selected_indices = selected_indices[torch.randperm(len(selected_indices))].unbind(dim=1)

            inside_anchor_bboxes = inside_anchor_bboxes[selected_indices]
            gt_bboxes = gt_bboxes_batch[selected_indices[0], anchor_assignments[selected_indices]]
            gt_anchor_objectnesses = labels[selected_indices]
            gt_anchor_transformers = BBox.calc_transformer(inside_anchor_bboxes, gt_bboxes)
            batch_indices = selected_indices[0]

            anchor_objectness_losses, anchor_transformer_losses = self.loss(inside_anchor_objectnesses[selected_indices],
                                                                            inside_anchor_transformers[selected_indices],
                                                                            gt_anchor_objectnesses,
                                                                            gt_anchor_transformers,
                                                                            batch_size, batch_indices)

            return anchor_objectnesses, anchor_transformers, anchor_objectness_losses, anchor_transformer_losses
Esempio n. 6
0
    def forward(
        self,
        features: Tensor,
        anchor_bboxes: Optional[Tensor] = None,
        gt_bboxes_batch: Optional[Tensor] = None,
        image_width: Optional[int] = None,
        image_height: Optional[int] = None
    ) -> Union[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor, Tensor, Tensor]]:

        #输入的features是h,w是原图的h/16,w/16,也就是anchors_y和anchors_x
        #下面用 ga_y,ga_x,ga_n 指代 anchors_y, anchors_x, anchors_n
        batch_size = features.shape[0]

        features = self._features(
            features)  #(bn,1024,ga_y,ga_x)->(bn,512,ga_y,ga_x)@ksp(3,1,1)
        anchor_objectnesses = self._anchor_objectness(
            features)  # (bn,2*9,ga_y,ga_x)@ksp(1,1,0) 这里假设每个点的anchor数为9
        anchor_transformers = self._anchor_transformer(
            features)  #(bn,4*9,ga_y,ga_x)@ksp(1,1,0)

        #输出所有anchor的obj(x2)和coor(x4).
        #这个处理类似于yolo的(1,10647,85),但是yolo由于输入图像的尺寸固定,因此输出也是固定的.
        #yolo的obj和预测坐标,是用一个向量表示的,rcnn这里是分开表示的.
        #这里由于图像尺寸不固定,anchor的obj和coor输出也不固定.
        #ga_n=anchors_n=ga_x * ga_y * 9
        anchor_objectnesses = anchor_objectnesses.permute(
            0, 2, 3, 1).contiguous().view(batch_size, -1, 2)  #ao@(bn,ga_n,2)
        anchor_transformers = anchor_transformers.permute(
            0, 2, 3, 1).contiguous().view(batch_size, -1, 4)  #at@(bn,ga_n,4)

        #self.training是nn.Module的成员变量.
        #对于infer,在前面已经置model为eval,因此会改变training为False.
        if not self.training:
            #在推断的时候,anchors是没啥用的,也不会传进来
            return anchor_objectnesses, anchor_transformers
        else:
            #NOTE 总的训练流程是:ious->labels->bg/fg->selected_indices

            # remove cross-boundary
            # NOTE: The length of `inside_indices` is guaranteed to be a multiple of `anchor_bboxes.shape[0]` as each batch in `anchor_bboxes` is the same
            ## 1.先过滤掉处于边缘的anchors
            inside_indices = BBox.inside(
                anchor_bboxes,
                left=0,
                top=0,
                right=image_width,
                bottom=image_height).nonzero().unbind(dim=1)
            inside_anchor_bboxes = anchor_bboxes[inside_indices].view(
                batch_size, -1, anchor_bboxes.shape[2])  #ab@(bn,ga_in,4)
            inside_anchor_objectnesses = anchor_objectnesses[
                inside_indices].view(
                    batch_size, -1,
                    anchor_objectnesses.shape[2])  #ao@(bn,ga_in,2)
            inside_anchor_transformers = anchor_transformers[
                inside_indices].view(
                    batch_size, -1,
                    anchor_transformers.shape[2])  #at@(bn,ga_in,4)

            # find labels for each `anchor_bboxes`
            ## 2.为所有的ga_in anchor分配后景/前景label(0或1),默认为-1(忽略)
            #注意labels的第一列是batch_size,通常为1
            #labels@(bn,ga_in)
            labels = torch.full((batch_size, inside_anchor_bboxes.shape[1]),
                                -1,
                                dtype=torch.long,
                                device=inside_anchor_bboxes.device)
            ious = BBox.iou(
                inside_anchor_bboxes, gt_bboxes_batch
            )  #广播产生:(bn,ga_in,gt_n) :!!!NOTE iou在rpn中只用到一次,在detection中也只用到一次
            # NOTE 计算每个anchor与所有gt_boxes的iou是关键!
            # 对于返回的ious,先忽略第一列(表示批内的编号,对于batch_size为1的情况,这个编号总是为0),
            # 则返回的ious的每一行表示每个生成anchor与所有gt_boxes(每列表示1个gt_box)的ious
            # 对于anchor与gt_box不相交的,则这个iou为0. 总之,可以为每个anchor生成与所有gt_boxex对应的ious.

            anchor_max_ious, anchor_assignments = ious.max(dim=2)  #(bn,ga_in)
            gt_max_ious, gt_assignments = ious.max(dim=1)  #(bn,gt_n)

            #下面的计算是要找出label必须置1的anchor的索引.这是从gt_box的角度考虑.
            selector = (ious == gt_max_ious.unsqueeze(dim=1))  #(bn,ga_in,gt_n)
            selector = (ious > 0) & selector  #(bn,ga_in,gt_n)
            selector = selector.nonzero()  #       (nonzero_num,3),比如(5,3)
            selector = selector[:, :2]  #只取前两列,(nonzero_num,2),比如(5,2)
            anchor_additions = selector.unbind(
                dim=1
            )  #使用unbind把selector分解为具有2个元素的tuple,每个元素对应上面的1列,这样主要是为了下面的多维数组索引

            #测试用
            #anchor_additions2 = ((ious > 0) & (ious == gt_max_ious.unsqueeze(dim=1))).nonzero()[:, :2].unbind(dim=1)
            #result=(selector == ((ious > 0) & (ious == gt_max_ious.unsqueeze(dim=1))).nonzero()[:, :2]).sum()

            #NOTE 根据计算出ious确定anchor的labels,再由labels确定前景(1)/后景(0),再从前景/后景中选择一批数据训练:
            # ious@(ab&gb)-->anchors_labels@(0,1,-1)-->fg(1)&bg(0)-->selected_indices

            #注意下面的labels设置并没有覆盖所有的labels,没有覆盖到的,默认是初始化时候的-1 :
            # [0,0.3)置0
            # [0.3,0.7)置-1
            # [0.7,1]置1
            labels[anchor_max_ious <
                   0.3] = 0  #labels置0的条件. 并不是除了置1的就是置0,还有默认的置-1的
            labels[anchor_additions] = 1  #anchor_additions表示满足labels必须置1的多维索引
            labels[anchor_max_ious >= 0.7] = 1

            # select `batch_size` x 256 samples
            ## 3.选择 batch_size x 256个样本训练
            fg_indices = (
                labels == 1).nonzero()  #(fg_n,2) 注意第一列表示批量.第二列对应ga_in索引.
            bg_indices = (labels == 0).nonzero()  #(bg_n,2)
            len_fg = len(fg_indices)  #(fg_n,)
            len_bg = len(bg_indices)  #(bg_n,)
            randperm_fg = torch.randperm(len_fg)  #随机排列所有的前景(fg_n,)
            randperm_bg = torch.randperm(len_bg)  #随机排列所有的后景(bg_n,)

            fg_indices = fg_indices[randperm_fg[:min(
                len_fg, 128 * batch_size)]]  #选出的前景(sfg_n,2),最少len_fg个,最多128个
            bg_indices = bg_indices[
                randperm_bg[:256 * batch_size -
                            len_fg]]  #选出的后景(sbg_n,2).总计sfg+sbg=256

            # NOTE:selected_indices 是下面选择ao/at与gao/gat的关键
            # 类似于YOLOv3中通过build_target构造mask,但是明显YOLOv3那里更简明
            # 3.1 确定selected_indices
            selected_indices = torch.cat(
                [fg_indices, bg_indices],
                dim=0)  #合并随机前景与后景@(256,2) 第一列是批,第二列是ga_in内索引
            randperm_selected = torch.randperm(len(selected_indices))  #再随机排列
            selected_indices = selected_indices[randperm_selected]
            selected_indices = selected_indices.unbind(
                dim=1)  #分解为2个元素的tuple,每个元素是上面的一列:([批内索引],[anchor内索引])

            #3.2 由selected_indices确定gao/gat(计算变换)
            selected_inside_anchor_bboxes = inside_anchor_bboxes[
                selected_indices]  #(256,4)
            #selected_indices[0]表示选中的批, anchor_assignments[selected_indices]表示对应的gt_boxes
            selected_gt_bboxes = gt_bboxes_batch[
                selected_indices[0],
                anchor_assignments[selected_indices]]  #(256,4)
            gt_anchor_objectnesses = labels[selected_indices]  #(256,) 或为0或为1
            gt_anchor_transformers = BBox.calc_transformer(
                selected_inside_anchor_bboxes,
                selected_gt_bboxes)  #(256,4). 逆变换:计算由gt到anchor的变换

            #3.3 由selected_indices确定ao/at
            inside_anchor_objectnesses = inside_anchor_objectnesses[
                selected_indices]  #(256,2)
            inside_anchor_transformers = inside_anchor_transformers[
                selected_indices]  #(256,4)

            batch_indices = selected_indices[
                0]  #记录被选中gt的批索引. 由于通常batch_size为1,因此batch_indices全为0

            anchor_objectness_losses, anchor_transformer_losses = self.loss(
                inside_anchor_objectnesses, inside_anchor_transformers,
                gt_anchor_objectnesses, gt_anchor_transformers, batch_size,
                batch_indices)  #计算 ao,at 与 gao,gat之间的损失
            #ao,at,aol,atl
            return anchor_objectnesses, anchor_transformers, anchor_objectness_losses, anchor_transformer_losses
    def forward(
        self,
        features: Tensor,
        anchor_bboxes: Optional[Tensor] = None,
        gt_bboxes_batch: Optional[Tensor] = None,
        image_width: Optional[int] = None,
        image_height: Optional[int] = None
    ) -> Union[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor, Tensor, Tensor]]:
        batch_size = features.shape[0]
        # forward convolution
        features = self._features(features)
        anchor_objectnesses = self._anchor_objectness(features)
        anchor_transformers = self._anchor_transformer(features)

        anchor_objectnesses = anchor_objectnesses.permute(
            0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
        anchor_transformers = anchor_transformers.permute(
            0, 2, 3, 1).contiguous().view(batch_size, -1, 4)

        if not self.training:
            return anchor_objectnesses, anchor_transformers
        else:
            # return len(inside_indices) of the inside bbox(value: bool)
            inside_indices = BBox.inside(
                anchor_bboxes,
                left=0,
                top=0,
                right=image_width,
                bottom=image_height).nonzero().unbind(dim=1)
            # inside_anchor_bboxes: [batch_size,K(num of inside anchors),4]
            inside_anchor_bboxes = anchor_bboxes[inside_indices].view(
                batch_size, -1, anchor_bboxes.shape[2])

            # inside_anchor_bboxes: [batch_size,K(num of inside anchors),2]
            # inside_anchor_transformers: [batch_size,K(num of inside anchors),4]
            inside_anchor_objectnesses = anchor_objectnesses[
                inside_indices].view(batch_size, -1,
                                     anchor_objectnesses.shape[2])
            inside_anchor_transformers = anchor_transformers[
                inside_indices].view(batch_size, -1,
                                     anchor_transformers.shape[2])

            # find labels for each `anchor_bboxes`
            # labels: [batch_size,K(num of inside anchors)]
            labels = torch.full((batch_size, inside_anchor_bboxes.shape[1]),
                                -1,
                                dtype=torch.long,
                                device=inside_anchor_bboxes.device)
            # ious: [[batch_size,K(num of inside anchors),N(num of gt_bboxes)]
            ious = BBox.iou(inside_anchor_bboxes, gt_bboxes_batch)
            # get max iou and its indices of each anchor(dim=2)
            anchor_max_ious, anchor_assignments = ious.max(dim=2)
            # get max iou and its indices of each gt_bbox(dim=1)
            gt_max_ious, gt_assignments = ious.max(dim=1)

            # those which reach gt_max_iou but may lower then 0.3
            anchor_additions = ((ious > 0) & (ious == gt_max_ious.unsqueeze(
                dim=1))).nonzero()[:, :2].unbind(dim=1)
            # threshold
            # note that labels are initialed as -1, so there remains some -1 as 0.3<=anchor_max_ious<0.7 and not in anchor_additions
            labels[anchor_max_ious < 0.3] = 0
            labels[anchor_additions] = 1
            labels[anchor_max_ious >= 0.7] = 1

            # select 256 x `batch_size` samples
            # fg_indices: [N(num of nonzero element),2] value:(which batch,which index)(matrix indices)
            # bg_indices: [N(num of nonzero element),2] value:(which batch,which index)(matrix indices)
            fg_indices = (labels == 1).nonzero()
            bg_indices = (labels == 0).nonzero()

            fg_indices = fg_indices[torch.randperm(
                len(fg_indices))[:min(len(fg_indices), 128 * batch_size)]]
            bg_indices = bg_indices[torch.randperm(
                len(bg_indices))[:256 * batch_size - len(fg_indices)]]
            selected_indices = torch.cat([fg_indices, bg_indices], dim=0)
            selected_indices = selected_indices[torch.randperm(
                len(selected_indices))].unbind(dim=1)

            inside_anchor_bboxes = inside_anchor_bboxes[selected_indices]
            gt_bboxes = gt_bboxes_batch[selected_indices[0],
                                        anchor_assignments[selected_indices]]
            gt_anchor_objectnesses = labels[selected_indices]
            gt_anchor_transformers = BBox.calc_transformer(
                inside_anchor_bboxes, gt_bboxes)

            batch_indices = selected_indices[0]

            # Loss Calculation
            anchor_objectness_losses, anchor_transformer_losses = self.loss(
                inside_anchor_objectnesses[selected_indices],
                inside_anchor_transformers[selected_indices],
                gt_anchor_objectnesses, gt_anchor_transformers, batch_size,
                batch_indices)

            return anchor_objectnesses, anchor_transformers, anchor_objectness_losses, anchor_transformer_losses
Esempio n. 8
0
        def forward(
            self,
            features: Tensor,
            proposal_bboxes: Tensor,
            gt_classes_batch: Optional[Tensor] = None,
            gt_bboxes_batch: Optional[Tensor] = None
        ) -> Union[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor, Tensor,
                                                Tensor]]:

            #features@(bn,1024,h/16,w/16)
            #pb@(bn,gp_n,4)

            batch_size = features.shape[0]

            if not self.training:
                #(bn,gp_n)
                proposal_batch_indices = torch.arange(
                    end=batch_size,
                    dtype=torch.long,
                    device=proposal_bboxes.device).view(-1, 1).repeat(
                        1, proposal_bboxes.shape[1])
                #ROI pooling:(bn,gp_n,1024,7,7). 这里的ROI pooling相当于把features与pb进行合并
                pool = Pooler.apply(features,
                                    proposal_bboxes.view(-1, 4),
                                    proposal_batch_indices.view(-1),
                                    mode=self._pooler_mode)

                hidden = self.hidden(
                    pool
                )  #(bn,gp_n,2048,4,4). 这里(7,7)变为(4,4)的原因是hidden中还有个stride=2的卷积层.
                hidden = F.adaptive_max_pool2d(
                    input=hidden, output_size=1)  #(bn,gp_n,2048,1,1)
                hidden = hidden.view(hidden.shape[0], -1)  #(bn,gp_n,2048)

                #self._proposal_class@Linear(2048,num_cls) self._proposal_transformer@Linear(2048,4*num_cls)
                proposal_classes = self._proposal_class(
                    hidden)  #作分类的线性变换:(bn,gp_n,num_cls)
                proposal_transformers = self._proposal_transformer(
                    hidden)  #框回归:(bn,gp_n,num_cls*4)

                proposal_classes = proposal_classes.view(
                    batch_size, -1,
                    proposal_classes.shape[-1])  #(bn,gp_n,num_cls)
                proposal_transformers = proposal_transformers.view(
                    batch_size, -1,
                    proposal_transformers.shape[-1])  #(bn,gp_n,num_cls*4)
                return proposal_classes, proposal_transformers
            else:
                #NOTE 总的处理流程类似rpn的训练forward: ious->labels->fg/bg->selected_indices.

                # find labels for each `proposal_bboxes`
                # 1.为每个pb找到类label@(bn,gp_n),默认值为-1
                labels = torch.full((batch_size, proposal_bboxes.shape[1]),
                                    -1,
                                    dtype=torch.long,
                                    device=proposal_bboxes.device)
                # ious@(bn,gp_n,gb_n) ious是确定labels的关键
                ious = BBox.iou(
                    proposal_bboxes, gt_bboxes_batch
                )  #NOTE iou在detection.forward中只用到一次,在rpn.forward中也只用到一次
                proposal_max_ious, proposal_assignments = ious.max(
                    dim=2)  #(bn,gp_n)
                labels[proposal_max_ious < 0.5] = 0  #背景类
                fg_masks = proposal_max_ious >= 0.5  #(bn,gp_n) 前景类的条件
                if len(fg_masks.nonzero()) > 0:
                    labels[fg_masks] = gt_classes_batch[
                        fg_masks.nonzero()[:,
                                           0], proposal_assignments[fg_masks]]
                    # gt_classes_batch@(b,gt_n).  为前景类设置 类label

                #现在labels共分为3种:
                #a. 为0 ,对应背景proposal
                #b. 为cls_label(>0),对应前景proposal
                #c. 为-1的默认值,忽略

                # select 128 x `batch_size` samples
                #2.确定selected_indices, 选出 batch_size x 128 个训练样本
                fg_indices = (labels > 0).nonzero()  #(fg_n,2)
                bg_indices = (labels == 0).nonzero()  #(bg_n,2)
                fg_indices = fg_indices[torch.randperm(
                    len(fg_indices))[:min(len(fg_indices), 32 *
                                          batch_size)]]  #最少len_fg,最多32个
                bg_indices = bg_indices[torch.randperm(
                    len(bg_indices))[:128 * batch_size - len(fg_indices)]]
                selected_indices = torch.cat([fg_indices, bg_indices],
                                             dim=0)  #(bn*128,2)
                #分裂为元素为2的tuple,每个元素是上面的一列:([批内索引],[pb内索引])
                selected_indices = selected_indices[torch.randperm(
                    len(selected_indices))].unbind(dim=1)

                proposal_bboxes = proposal_bboxes[
                    selected_indices]  #(bn*128,4)
                #以下是对gt_bboxes_batch的多维索引:selected_indices[0]是128个批内图像的索引,表示选择128个图像,
                #proposal_assignments[selected_indices]表示128个图像对应的gt_box序号,也是128个
                #最终选择了128个gt_bboxes.
                gt_bboxes = gt_bboxes_batch[
                    selected_indices[0],
                    proposal_assignments[selected_indices]]  #(bn*128,4)
                gt_proposal_classes = labels[selected_indices]  #(bn*128,)
                gt_proposal_transformers = BBox.calc_transformer(
                    proposal_bboxes, gt_bboxes)  #(bn*128,4)
                batch_indices = selected_indices[0]  #(bn*128)

                #features@(bn,1024,w/16,h/16) proposal_bboxes@(bn*128,4)
                #pool@(bn*128,1024,7,7)
                pool = Pooler.apply(features,
                                    proposal_bboxes,
                                    proposal_batch_indices=batch_indices,
                                    mode=self._pooler_mode)
                hidden = self.hidden(pool)  #(bn*128,2048,4,4)
                hidden = F.adaptive_max_pool2d(
                    input=hidden, output_size=1)  #(bn*128,2048,1,1)
                hidden = hidden.view(hidden.shape[0], -1)  #(bn*128,2048)

                #self._proposal_class@Linear(2048,num_cls) self._proposal_transformer@Linear(2048,4*num_cls)
                proposal_classes = self._proposal_class(
                    hidden)  #(bn*128,num_cls)
                proposal_transformers = self._proposal_transformer(
                    hidden)  #(bn*128,num_cls*4)

                proposal_class_losses, proposal_transformer_losses = self.loss(
                    proposal_classes, proposal_transformers,
                    gt_proposal_classes, gt_proposal_transformers, batch_size,
                    batch_indices)

                return proposal_classes, proposal_transformers, proposal_class_losses, proposal_transformer_losses
Esempio n. 9
0
        def forward(
            self,
            features: Tensor,
            proposal_bboxes: Tensor,
            gt_classes_batch: Optional[Tensor] = None,
            gt_bboxes_batch: Optional[Tensor] = None,
            gt_vertices_batch: Optional[Tensor] = None,
            image_width=None,
            image_height=None
        ) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[Tensor, Tensor, Tensor,
                                                        Tensor, Tensor]]:
            batch_size = features.shape[0]

            if not self.training:
                proposal_batch_indices = torch.arange(
                    end=batch_size,
                    dtype=torch.long,
                    device=proposal_bboxes.device).view(-1, 1).repeat(
                        1, proposal_bboxes.shape[1])
                pool = Pooler.apply(features,
                                    proposal_bboxes.view(-1, 4),
                                    proposal_batch_indices.view(-1),
                                    mode=self._pooler_mode)
                pool = pool.view(pool.shape[0], -1)
                hidden = self.hidden[0](pool)
                hidden = self.hidden[1](hidden)
                hidden = self.hidden[3](hidden)
                hidden = self.hidden[4](hidden)
                #hidden = F.adaptive_max_pool2d(input=hidden, output_size=1)
                #hidden = hidden.view(hidden.shape[0], -1)
                proposal_transformers = self._proposal_transformer(hidden)

                if self.iteration:
                    detection_bboxes = self.create_bboxes(
                        proposal_bboxes, proposal_transformers.unsqueeze(0),
                        image_width, image_height, 1)
                    detection_bboxes = detection_bboxes.view(-1, 4)
                    pool = Pooler.apply(features,
                                        detection_bboxes.view(-1, 4),
                                        proposal_batch_indices.view(-1),
                                        mode=self._pooler_mode)
                    pool = pool.view(pool.shape[0], -1)

                    hidden = self.hidden[0](pool)
                    hidden = self.hidden[1](hidden)
                    hidden = self.hidden[3](hidden)
                    hidden = self.hidden[4](hidden)
                    proposal_transformers2 = self._proposal_transformer(hidden)
                    proposal_transformers2 = proposal_transformers2.view(
                        batch_size, -1, proposal_transformers.shape[-1])
                else:
                    proposal_transformers2 = None

                proposal_classes = self._proposal_class(hidden)
                proposal_vertices = self._proposal_vertices(hidden)

                proposal_classes = proposal_classes.view(
                    batch_size, -1, proposal_classes.shape[-1])
                proposal_transformers = proposal_transformers.view(
                    batch_size, -1, proposal_transformers.shape[-1])
                proposal_vertices = proposal_vertices.view(
                    batch_size, -1, proposal_vertices.shape[-1])
                return proposal_vertices, proposal_classes, proposal_transformers, proposal_transformers2
            else:
                labels = torch.full((batch_size, proposal_bboxes.shape[1]),
                                    -1,
                                    dtype=torch.long,
                                    device=proposal_bboxes.device)
                # print(proposal_bboxes.size(), gt_bboxes_batch.size())
                ious = BBox.iou(proposal_bboxes, gt_bboxes_batch)
                #print(proposal_bboxes.size(), gt_bboxes_batch.size(), ious.size())
                proposal_max_ious, proposal_assignments = ious.max(dim=2)
                labels[proposal_max_ious < 0.5] = 0
                fg_masks = proposal_max_ious >= 0.5
                if len(fg_masks.nonzero()) > 0:
                    labels[fg_masks] = gt_classes_batch[
                        fg_masks.nonzero()[:,
                                           0], proposal_assignments[fg_masks]]

                # select 128 x `batch_size` samples
                fg_indices = (labels > 0).nonzero()
                bg_indices = (labels == 0).nonzero()
                fg_indices = fg_indices[torch.randperm(
                    len(fg_indices))[:min(len(fg_indices), 32 * batch_size)]]
                bg_indices = bg_indices[torch.randperm(
                    len(bg_indices))[:128 * batch_size - len(fg_indices)]]
                selected_indices = torch.cat([fg_indices, bg_indices], dim=0)
                selected_indices = selected_indices[torch.randperm(
                    len(selected_indices))].unbind(dim=1)

                proposal_bboxes = proposal_bboxes[selected_indices]
                gt_bboxes = gt_bboxes_batch[
                    selected_indices[0],
                    proposal_assignments[selected_indices]]
                gt_vertices = gt_vertices_batch[
                    selected_indices[0],
                    proposal_assignments[selected_indices]]
                gt_proposal_classes = labels[selected_indices]
                gt_proposal_transformers = BBox.calc_transformer(
                    proposal_bboxes, gt_bboxes)
                batch_indices = selected_indices[0]

                #print('before', gt_proposal_classes)
                #print(gt_proposal_classes.size())

                pool = Pooler.apply(features,
                                    proposal_bboxes,
                                    proposal_batch_indices=batch_indices,
                                    mode=self._pooler_mode)

                #vgg16
                hidden = self.hidden(pool.view(pool.shape[0], -1))

                #resnet101
                # hidden = self.hidden(pool)
                # hidden = F.adaptive_max_pool2d(input=hidden, output_size=1)
                # hidden = hidden.view(hidden.shape[0], -1)

                if self.iteration:
                    proposal_transformers_first_iter = self._proposal_transformer(
                        hidden)
                    detection_bboxes = self.create_bboxes(
                        proposal_bboxes.unsqueeze(0),
                        proposal_transformers_first_iter.unsqueeze(0),
                        image_width, image_height, 1)
                    detection_bboxes = detection_bboxes.view(-1, 4)
                    pool = Pooler.apply(features,
                                        detection_bboxes,
                                        proposal_batch_indices=batch_indices,
                                        mode=self._pooler_mode)
                    hidden = self.hidden(pool.view(pool.shape[0], -1))

                    bboxes_centers = torch.stack(
                        ((detection_bboxes[:, 0] + detection_bboxes[:, 2]) / 2,
                         (detection_bboxes[:, 1] + detection_bboxes[:, 3]) /
                         2),
                        dim=1)
                    width = detection_bboxes[:, 2] - detection_bboxes[:, 0]
                    height = detection_bboxes[:, 3] - detection_bboxes[:, 1]

                    gt_proposal_transformers = BBox.calc_transformer(
                        detection_bboxes, gt_bboxes).detach()
                    for batch_index in range(batch_size):
                        selected_batch_indices = (
                            batch_indices == batch_index).nonzero().view(-1)
                        ious = BBox.iou(
                            detection_bboxes[selected_batch_indices].unsqueeze(
                                0), gt_bboxes_batch[batch_index].unsqueeze(
                                    0)).detach()
                        #print('iter', detection_bboxes.size(), gt_bboxes.size(), ious.size())
                        max_ious, _ = ious.max(dim=2)
                        # print(gt_proposal_classes.size(), max_ious.size())
                        gt_proposal_classes[selected_batch_indices][
                            max_ious[0] < 0.5] = 0
                        gt_proposal_classes[selected_batch_indices][
                            max_ious[0] >= 0.5] = 1
                    #print('after', gt_proposal_classes)
                    #print(gt_proposal_classes.size())

                    # #if fg_indices.nelement() > 0:
                    # infinites = torch.isinf(gt_proposal_transformers)
                    # if gt_bboxes[gt_bboxes > 0].nelement() > 0 and infinites[infinites == 1].nelement() > 0:
                    #     #print(infinites)
                    #     #print(gt_proposal_transformers)
                    #     # print(infinites.size())
                    #     indices = torch.max(infinites,1)[0]
                    #     #print(indices)
                    #     indices = indices.nonzero().view(-1)

                    #print(indices)
                    #print('gt_proposal_transformers', gt_proposal_transformers[indices])
                    #print('detection_bboxes', detection_bboxes[indices])
                    #print('gt_bboxes', gt_bboxes[indices])
                    #print('ious', ious[0,index], ious.size())
                    #print(ious.size())
                    #print('max_ious', max_ious[0,indices], max_ious.size())
                    #print('gt_proposal_classes', gt_proposal_classes[indices], gt_proposal_classes.size())
                    #     #yo = BBox.calc_transformer(detection_bboxes[index].unsqueeze(0), gt_bboxes[index].unsqueeze(0), print_it=True).detach()

                else:
                    bboxes_centers = torch.stack(
                        ((proposal_bboxes[:, 0] + proposal_bboxes[:, 2]) / 2,
                         (proposal_bboxes[:, 1] + proposal_bboxes[:, 3]) / 2),
                        dim=1)
                    width = proposal_bboxes[:, 2] - proposal_bboxes[:, 0]
                    height = proposal_bboxes[:, 3] - proposal_bboxes[:, 1]

                gt_vertices_norm = torch.empty(gt_vertices.size(),
                                               dtype=torch.float,
                                               device=gt_vertices.device)

                for i in range(gt_vertices_norm.size()[-1]):
                    gt_vertices_norm[:, :, i] = torch.stack(
                        ((gt_vertices[:, 0, i] - bboxes_centers[:, 0]) / width,
                         (gt_vertices[:, 1, i] - bboxes_centers[:, 1]) /
                         height),
                        dim=1)
                gt_vertices_norm = gt_vertices_norm.detach()

                proposal_classes = self._proposal_class(hidden)
                proposal_vertices = self._proposal_vertices(hidden)
                proposal_transformers = self._proposal_transformer(hidden)

                proposal_class_losses, proposal_transformer_losses, vertex_losses = self.loss(
                    proposal_vertices, proposal_classes, proposal_transformers,
                    gt_proposal_classes, gt_proposal_transformers,
                    gt_vertices_norm, batch_size, batch_indices)

                return proposal_vertices, proposal_classes, proposal_transformers, proposal_class_losses, proposal_transformer_losses, vertex_losses