Esempio n. 1
0
    def forward_train(self, img, img_meta, gt_bboxes=None):
        x = self.extract_feat(img)
        featmap_sizes = [featmap.size()[-2:] for featmap in x]
        anchor_list, valid_flag_list = self.rpn_head[0].init_anchors(
            featmap_sizes, img_meta)
        losses = dict()

        for i in range(self.num_stages):
            rpn_train_cfg = self.train_cfg.rpn[i]
            rpn_head = self.rpn_head[i]

            if rpn_head.feat_adapt:
                offset_list = anchor_offset(anchor_list,
                                            rpn_head.anchor_strides,
                                            featmap_sizes)
            else:
                offset_list = None
            x, cls_score, bbox_pred = rpn_head(x, offset_list)
            rpn_loss_inputs = (anchor_list, valid_flag_list, cls_score,
                               bbox_pred, gt_bboxes, img_meta, rpn_train_cfg)
            stage_loss = rpn_head.loss(*rpn_loss_inputs)
            for name, value in stage_loss.items():
                losses['s{}.{}'.format(i, name)] = value

            # refine boxes
            if i < self.num_stages - 1:
                anchor_list = rpn_head.refine_bboxes(anchor_list, bbox_pred,
                                                     img_meta)
        return losses
Esempio n. 2
0
    def simple_test(self, img, img_meta, proposals=None, rescale=False):
        """Test without augmentation."""
        assert self.with_bbox, "Bbox head must be implemented."

        x = self.extract_feat(img)

        if self.with_rpn:
            proposal_list = self.simple_test_rpn(x, img_meta,
                                                 self.test_cfg.rpn)
        elif self.with_cascade_rpn:
            rpn_feat = x
            featmap_sizes = [featmap.size()[-2:] for featmap in rpn_feat]
            anchor_list, _ = self.rpn_head[0].init_anchors(
                featmap_sizes, img_meta)

            for i in range(self.num_rpn_stages):
                rpn_head = self.rpn_head[i]
                if rpn_head.feat_adapt:
                    offset_list = anchor_offset(anchor_list,
                                                rpn_head.anchor_strides,
                                                featmap_sizes)
                else:
                    offset_list = None
                rpn_feat, cls_score, bbox_pred = rpn_head(
                    rpn_feat, offset_list)
                if i < self.num_rpn_stages - 1:
                    anchor_list = rpn_head.refine_bboxes(
                        anchor_list, bbox_pred, img_meta)

            proposal_list = self.rpn_head[-1].get_bboxes(
                anchor_list, cls_score, bbox_pred, img_meta, self.test_cfg.rpn)
        else:
            proposal_list = proposals

        det_bboxes, det_labels = self.simple_test_bboxes(x,
                                                         img_meta,
                                                         proposal_list,
                                                         self.test_cfg.rcnn,
                                                         rescale=rescale)
        bbox_results = bbox2result(det_bboxes, det_labels,
                                   self.bbox_head.num_classes)

        if not self.with_mask:
            return bbox_results
        else:
            segm_results = self.simple_test_mask(x,
                                                 img_meta,
                                                 det_bboxes,
                                                 det_labels,
                                                 rescale=rescale)
            return bbox_results, segm_results
Esempio n. 3
0
    def common_test(self, x, img_meta):
        featmap_sizes = [featmap.size()[-2:] for featmap in x]
        anchor_list, _ = self.rpn_head[0].init_anchors(featmap_sizes, img_meta)

        for i in range(self.num_stages):
            rpn_head = self.rpn_head[i]
            if rpn_head.feat_adapt:
                offset_list = anchor_offset(anchor_list,
                                            rpn_head.anchor_strides,
                                            featmap_sizes)
            else:
                offset_list = None
            x, cls_score, bbox_pred = rpn_head(x, offset_list)
            if i < self.num_stages - 1:
                anchor_list = rpn_head.refine_bboxes(anchor_list, bbox_pred,
                                                     img_meta)

        proposal_list = self.rpn_head[-1].get_bboxes(anchor_list, cls_score,
                                                     bbox_pred, img_meta,
                                                     self.test_cfg.rpn)
        return proposal_list
Esempio n. 4
0
    def simple_test(self, img, img_meta, proposals=None, rescale=False):
        x = self.extract_feat(img)
        if self.with_rpn:
            proposal_list = self.simple_test_rpn(x, img_meta,
                                                 self.test_cfg.rpn)
        elif self.with_cascade_rpn:
            rpn_feat = x
            featmap_sizes = [featmap.size()[-2:] for featmap in rpn_feat]
            anchor_list, _ = self.rpn_head[0].init_anchors(
                featmap_sizes, img_meta)

            for i in range(self.num_rpn_stages):
                rpn_head = self.rpn_head[i]
                if rpn_head.feat_adapt:
                    offset_list = anchor_offset(anchor_list,
                                                rpn_head.anchor_strides,
                                                featmap_sizes)
                else:
                    offset_list = None
                rpn_feat, cls_score, bbox_pred = rpn_head(
                    rpn_feat, offset_list)
                if i < self.num_rpn_stages - 1:
                    anchor_list = rpn_head.refine_bboxes(
                        anchor_list, bbox_pred, img_meta)

            proposal_list = self.rpn_head[-1].get_bboxes(
                anchor_list, cls_score, bbox_pred, img_meta, self.test_cfg.rpn)
        else:
            proposal_list = proposals

        img_shape = img_meta[0]['img_shape']
        ori_shape = img_meta[0]['ori_shape']
        scale_factor = img_meta[0]['scale_factor']

        # "ms" in variable names means multi-stage
        ms_bbox_result = {}
        ms_segm_result = {}
        ms_scores = []
        rcnn_test_cfg = self.test_cfg.rcnn

        rois = bbox2roi(proposal_list)
        for i in range(self.num_stages):
            bbox_roi_extractor = self.bbox_roi_extractor[i]
            bbox_head = self.bbox_head[i]

            bbox_feats = bbox_roi_extractor(
                x[:len(bbox_roi_extractor.featmap_strides)], rois)
            if self.with_shared_head:
                bbox_feats = self.shared_head(bbox_feats)

            cls_score, bbox_pred = bbox_head(bbox_feats)
            ms_scores.append(cls_score)

            if self.test_cfg.keep_all_stages:
                det_bboxes, det_labels = bbox_head.get_det_bboxes(
                    rois,
                    cls_score,
                    bbox_pred,
                    img_shape,
                    scale_factor,
                    rescale=rescale,
                    cfg=rcnn_test_cfg)
                bbox_result = bbox2result(det_bboxes, det_labels,
                                          bbox_head.num_classes)
                ms_bbox_result['stage{}'.format(i)] = bbox_result

                if self.with_mask:
                    mask_roi_extractor = self.mask_roi_extractor[i]
                    mask_head = self.mask_head[i]
                    if det_bboxes.shape[0] == 0:
                        mask_classes = mask_head.num_classes - 1
                        segm_result = [[] for _ in range(mask_classes)]
                    else:
                        _bboxes = (
                            det_bboxes[:, :4] *
                            scale_factor if rescale else det_bboxes)
                        mask_rois = bbox2roi([_bboxes])
                        mask_feats = mask_roi_extractor(
                            x[:len(mask_roi_extractor.featmap_strides)],
                            mask_rois)
                        if self.with_shared_head:
                            mask_feats = self.shared_head(mask_feats, i)
                        mask_pred = mask_head(mask_feats)
                        segm_result = mask_head.get_seg_masks(
                            mask_pred, _bboxes, det_labels, rcnn_test_cfg,
                            ori_shape, scale_factor, rescale)
                    ms_segm_result['stage{}'.format(i)] = segm_result

            if i < self.num_stages - 1:
                bbox_label = cls_score.argmax(dim=1)
                rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred,
                                                  img_meta[0])

        cls_score = sum(ms_scores) / self.num_stages
        det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes(
            rois,
            cls_score,
            bbox_pred,
            img_shape,
            scale_factor,
            rescale=rescale,
            cfg=rcnn_test_cfg)
        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)
        ms_bbox_result['ensemble'] = bbox_result

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                mask_classes = self.mask_head[-1].num_classes - 1
                segm_result = [[] for _ in range(mask_classes)]
            else:
                if isinstance(scale_factor, float):  # aspect ratio fixed
                    _bboxes = (
                        det_bboxes[:, :4] *
                        scale_factor if rescale else det_bboxes)
                else:
                    _bboxes = (
                        det_bboxes[:, :4] *
                        torch.from_numpy(scale_factor).to(det_bboxes.device)
                        if rescale else det_bboxes)

                mask_rois = bbox2roi([_bboxes])
                aug_masks = []
                for i in range(self.num_stages):
                    mask_roi_extractor = self.mask_roi_extractor[i]
                    mask_feats = mask_roi_extractor(
                        x[:len(mask_roi_extractor.featmap_strides)], mask_rois)
                    if self.with_shared_head:
                        mask_feats = self.shared_head(mask_feats)
                    mask_pred = self.mask_head[i](mask_feats)
                    aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                merged_masks = merge_aug_masks(aug_masks,
                                               [img_meta] * self.num_stages,
                                               self.test_cfg.rcnn)
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks, _bboxes, det_labels, rcnn_test_cfg,
                    ori_shape, scale_factor, rescale)
            ms_segm_result['ensemble'] = segm_result

        if not self.test_cfg.keep_all_stages:
            if self.with_mask:
                results = (ms_bbox_result['ensemble'],
                           ms_segm_result['ensemble'])
            else:
                results = ms_bbox_result['ensemble']
        else:
            if self.with_mask:
                results = {
                    stage: (ms_bbox_result[stage], ms_segm_result[stage])
                    for stage in ms_bbox_result
                }
            else:
                results = ms_bbox_result

        return results
Esempio n. 5
0
    def forward_train(self,
                      img,
                      img_meta,
                      gt_bboxes,
                      gt_labels,
                      gt_bboxes_ignore=None,
                      gt_masks=None,
                      proposals=None):
        x = self.extract_feat(img)

        losses = dict()

        if self.with_rpn:
            rpn_outs = self.rpn_head(x)
            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
                                          self.train_cfg.rpn)
            rpn_losses = self.rpn_head.loss(
                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
            losses.update(rpn_losses)

            proposal_cfg = self.train_cfg.get('rpn_proposal',
                                              self.test_cfg.rpn)
            proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
        elif self.with_cascade_rpn:
            rpn_feat = x
            featmap_sizes = [featmap.size()[-2:] for featmap in rpn_feat]
            anchor_list, valid_flag_list = self.rpn_head[0].init_anchors(
                featmap_sizes, img_meta)

            for i in range(self.num_rpn_stages):
                rpn_train_cfg = self.train_cfg.rpn[i]
                rpn_head = self.rpn_head[i]

                if rpn_head.feat_adapt:
                    offset_list = anchor_offset(anchor_list,
                                                rpn_head.anchor_strides,
                                                featmap_sizes)
                else:
                    offset_list = None
                rpn_feat, cls_score, bbox_pred = rpn_head(
                    rpn_feat, offset_list)
                rpn_loss_inputs = (anchor_list, valid_flag_list, cls_score,
                                   bbox_pred, gt_bboxes, img_meta,
                                   rpn_train_cfg)
                stage_loss = rpn_head.loss(*rpn_loss_inputs)
                for name, value in stage_loss.items():
                    losses['s{}.{}'.format(i, name)] = value

                # refine boxes
                if i < self.num_rpn_stages - 1:
                    anchor_list = rpn_head.refine_bboxes(
                        anchor_list, bbox_pred, img_meta)
            proposal_list = self.rpn_head[-1].get_bboxes(
                anchor_list, cls_score, bbox_pred, img_meta, self.test_cfg.rpn)
        else:
            proposal_list = proposals

        for i in range(self.num_stages):
            self.current_stage = i
            rcnn_train_cfg = self.train_cfg.rcnn[i]
            lw = self.train_cfg.stage_loss_weights[i]

            # assign gts and sample proposals
            sampling_results = []
            if self.with_bbox or self.with_mask:
                bbox_assigner = build_assigner(rcnn_train_cfg.assigner)
                bbox_sampler = build_sampler(
                    rcnn_train_cfg.sampler, context=self)
                num_imgs = img.size(0)
                if gt_bboxes_ignore is None:
                    gt_bboxes_ignore = [None for _ in range(num_imgs)]

                for j in range(num_imgs):
                    assign_result = bbox_assigner.assign(
                        proposal_list[j], gt_bboxes[j], gt_bboxes_ignore[j],
                        gt_labels[j])
                    sampling_result = bbox_sampler.sample(
                        assign_result,
                        proposal_list[j],
                        gt_bboxes[j],
                        gt_labels[j],
                        feats=[lvl_feat[j][None] for lvl_feat in x])
                    sampling_results.append(sampling_result)

            # bbox head forward and loss
            bbox_roi_extractor = self.bbox_roi_extractor[i]
            bbox_head = self.bbox_head[i]

            rois = bbox2roi([res.bboxes for res in sampling_results])
            bbox_feats = bbox_roi_extractor(x[:bbox_roi_extractor.num_inputs],
                                            rois)
            if self.with_shared_head:
                bbox_feats = self.shared_head(bbox_feats)
            cls_score, bbox_pred = bbox_head(bbox_feats)

            bbox_targets = bbox_head.get_target(sampling_results, gt_bboxes,
                                                gt_labels, rcnn_train_cfg)
            loss_bbox = bbox_head.loss(cls_score, bbox_pred, *bbox_targets)
            for name, value in loss_bbox.items():
                losses['s{}.{}'.format(i, name)] = (
                    value * lw if 'loss' in name else value)

            # mask head forward and loss
            if self.with_mask:
                if not self.share_roi_extractor:
                    mask_roi_extractor = self.mask_roi_extractor[i]
                    pos_rois = bbox2roi(
                        [res.pos_bboxes for res in sampling_results])
                    mask_feats = mask_roi_extractor(
                        x[:mask_roi_extractor.num_inputs], pos_rois)
                    if self.with_shared_head:
                        mask_feats = self.shared_head(mask_feats)
                else:
                    # reuse positive bbox feats
                    pos_inds = []
                    device = bbox_feats.device
                    for res in sampling_results:
                        pos_inds.append(
                            torch.ones(
                                res.pos_bboxes.shape[0],
                                device=device,
                                dtype=torch.uint8))
                        pos_inds.append(
                            torch.zeros(
                                res.neg_bboxes.shape[0],
                                device=device,
                                dtype=torch.uint8))
                    pos_inds = torch.cat(pos_inds)
                    mask_feats = bbox_feats[pos_inds]
                mask_head = self.mask_head[i]
                mask_pred = mask_head(mask_feats)
                mask_targets = mask_head.get_target(sampling_results, gt_masks,
                                                    rcnn_train_cfg)
                pos_labels = torch.cat(
                    [res.pos_gt_labels for res in sampling_results])
                loss_mask = mask_head.loss(mask_pred, mask_targets, pos_labels)
                for name, value in loss_mask.items():
                    losses['s{}.{}'.format(i, name)] = (
                        value * lw if 'loss' in name else value)

            # refine bboxes
            if i < self.num_stages - 1:
                pos_is_gts = [res.pos_is_gt for res in sampling_results]
                roi_labels = bbox_targets[0]  # bbox_targets is a tuple
                with torch.no_grad():
                    proposal_list = bbox_head.refine_bboxes(
                        rois, roi_labels, bbox_pred, pos_is_gts, img_meta)

        return losses
Esempio n. 6
0
    def forward_train(self,
                      img,
                      img_meta,
                      gt_bboxes,
                      gt_labels,
                      gt_bboxes_ignore=None,
                      gt_masks=None,
                      proposals=None):
        x = self.extract_feat(img)

        losses = dict()

        # RPN forward and loss
        if self.with_rpn:
            rpn_outs = self.rpn_head(x)
            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
                                          self.train_cfg.rpn)
            rpn_losses = self.rpn_head.loss(*rpn_loss_inputs,
                                            gt_bboxes_ignore=gt_bboxes_ignore)
            losses.update(rpn_losses)

            proposal_cfg = self.train_cfg.get('rpn_proposal',
                                              self.test_cfg.rpn)
            proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)

        elif self.with_cascade_rpn:
            rpn_feat = x
            featmap_sizes = [featmap.size()[-2:] for featmap in rpn_feat]
            anchor_list, valid_flag_list = self.rpn_head[0].init_anchors(
                featmap_sizes, img_meta)

            for i in range(self.num_rpn_stages):
                rpn_train_cfg = self.train_cfg.rpn[i]
                rpn_head = self.rpn_head[i]

                if rpn_head.feat_adapt:
                    offset_list = anchor_offset(anchor_list,
                                                rpn_head.anchor_strides,
                                                featmap_sizes)
                else:
                    offset_list = None
                rpn_feat, cls_score, bbox_pred = rpn_head(
                    rpn_feat, offset_list)
                rpn_loss_inputs = (anchor_list, valid_flag_list, cls_score,
                                   bbox_pred, gt_bboxes, img_meta,
                                   rpn_train_cfg)
                stage_loss = rpn_head.loss(*rpn_loss_inputs)
                for name, value in stage_loss.items():
                    losses['s{}.{}'.format(i, name)] = value

                # refine boxes
                if i < self.num_rpn_stages - 1:
                    anchor_list = rpn_head.refine_bboxes(
                        anchor_list, bbox_pred, img_meta)
            proposal_list = self.rpn_head[-1].get_bboxes(
                anchor_list, cls_score, bbox_pred, img_meta, self.test_cfg.rpn)

        else:
            proposal_list = proposals

        # assign gts and sample proposals
        if self.with_bbox or self.with_mask:
            bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
            bbox_sampler = build_sampler(self.train_cfg.rcnn.sampler,
                                         context=self)
            num_imgs = img.size(0)
            if gt_bboxes_ignore is None:
                gt_bboxes_ignore = [None for _ in range(num_imgs)]
            sampling_results = []
            for i in range(num_imgs):
                assign_result = bbox_assigner.assign(proposal_list[i],
                                                     gt_bboxes[i],
                                                     gt_bboxes_ignore[i],
                                                     gt_labels[i])
                sampling_result = bbox_sampler.sample(
                    assign_result,
                    proposal_list[i],
                    gt_bboxes[i],
                    gt_labels[i],
                    feats=[lvl_feat[i][None] for lvl_feat in x])
                sampling_results.append(sampling_result)

        # bbox head forward and loss
        if self.with_bbox:
            rois = bbox2roi([res.bboxes for res in sampling_results])
            # TODO: a more flexible way to decide which feature maps to use
            bbox_feats = self.bbox_roi_extractor(
                x[:self.bbox_roi_extractor.num_inputs], rois)
            if self.with_shared_head:
                bbox_feats = self.shared_head(bbox_feats)
            cls_score, bbox_pred = self.bbox_head(bbox_feats)

            bbox_targets = self.bbox_head.get_target(sampling_results,
                                                     gt_bboxes, gt_labels,
                                                     self.train_cfg.rcnn)
            loss_bbox = self.bbox_head.loss(cls_score, bbox_pred,
                                            *bbox_targets)
            losses.update(loss_bbox)

        # mask head forward and loss
        if self.with_mask:
            if not self.share_roi_extractor:
                pos_rois = bbox2roi(
                    [res.pos_bboxes for res in sampling_results])
                mask_feats = self.mask_roi_extractor(
                    x[:self.mask_roi_extractor.num_inputs], pos_rois)
                if self.with_shared_head:
                    mask_feats = self.shared_head(mask_feats)
            else:
                pos_inds = []
                device = bbox_feats.device
                for res in sampling_results:
                    pos_inds.append(
                        torch.ones(res.pos_bboxes.shape[0],
                                   device=device,
                                   dtype=torch.uint8))
                    pos_inds.append(
                        torch.zeros(res.neg_bboxes.shape[0],
                                    device=device,
                                    dtype=torch.uint8))
                pos_inds = torch.cat(pos_inds)
                mask_feats = bbox_feats[pos_inds]
            mask_pred = self.mask_head(mask_feats)

            mask_targets = self.mask_head.get_target(sampling_results,
                                                     gt_masks,
                                                     self.train_cfg.rcnn)
            pos_labels = torch.cat(
                [res.pos_gt_labels for res in sampling_results])
            loss_mask = self.mask_head.loss(mask_pred, mask_targets,
                                            pos_labels)
            losses.update(loss_mask)

        return losses