예제 #1
0
    def preprocess_image(self, batched_inputs):
        """
        Normalize, pad and batch the input images.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]

        images = ImageList.from_tensors(images)
        return images
예제 #2
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.

        For now, each item in the list is a dict that contains:
            image: Tensor, image in (C, H, W) format.
            sem_seg: semantic segmentation ground truth
            Other information that's included in the original dicts, such as:
                "height", "width" (int): the output resolution of the model, used in inference.
                    See :meth:`postprocess` for details.

        Returns:
            list[dict]: Each dict is the output for one input image.
                The dict contains one key "sem_seg" whose value is a
                Tensor of the output resolution that represents the
                per-pixel segmentation prediction.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [self.normalizer(x) for x in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)

        features = self.backbone(images.tensor)

        if "sem_seg" in batched_inputs[0]:
            targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
            targets = ImageList.from_tensors(
                targets, self.backbone.size_divisibility,
                self.sem_seg_head.ignore_value).tensor
        else:
            targets = None
        results, losses = self.sem_seg_head(features, targets)

        if self.training:
            return losses

        processed_results = []
        for result, input_per_image, image_size in zip(results, batched_inputs,
                                                       images.image_sizes):
            height = input_per_image.get("height")
            width = input_per_image.get("width")
            r = sem_seg_postprocess(result, image_size, height, width)
            processed_results.append({"sem_seg": r})
        return processed_results
예제 #3
0
 def preprocess_flow(self, batched_inputs):
     """
     Normalize and pad and batch the target flow.
     """
     flows = [x["flow_map"].to(self.device) for x in batched_inputs]
     flows = [x / self.flow_div for x in flows]
     flows = ImageList.from_tensors(flows).tensor
     return flows
예제 #4
0
 def preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [(x - self.pixel_mean) / self.pixel_std for x in images]
     images = ImageList.from_tensors(images, self.backbone.size_divisibility)
     return images
 def preprocess_batchedimages(self, batched_inputs):
     """
     Preprocess batch: normalized, resize, pad -> uniform batch
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [self.normalizer(x) for x in images]
     images = ImageList.from_tensors(images, self.backbone.size_divisibility)
     return images
예제 #6
0
 def preprocess_image(self, inputs):
     data, im_info = inputs
     print(data.size())
     data = alias(data, "data")
     mean, std = self._wrapped_model.pixel_mean, self._wrapped_model.pixel_std
     images = (data - mean) / std
     images = ImageList(tensor=images, image_sizes=im_info)
     return images
예제 #7
0
 def preprocess_image(self, batched_inputs):
     """normalize, pad and batch the input images"""
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [self.normalizer(x) for x in images]
     images = ImageList.from_tensors(
         images, self.backbone.size_divisibility
     )
     return images
예제 #8
0
    def forward(self, batched_inputs: Tuple[Dict[str, torch.Tensor]]):
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        images = ImageList.from_tensors(images, size_divisibility=self.size_divisibility).tensor
        metas = []
        rescale = {"height" in x for x in batched_inputs}
        if len(rescale) != 1:
            raise ValueError("Some inputs have original height/width, but some don't!")
        rescale = list(rescale)[0]
        output_shapes = []
        for input in batched_inputs:
            meta = {}
            c, h, w = input["image"].shape
            meta["img_shape"] = meta["ori_shape"] = (h, w, c)
            if rescale:
                scale_factor = np.sqrt(h * w / (input["height"] * input["width"]))
                ori_shape = (input["height"], input["width"])
                output_shapes.append(ori_shape)
                meta["ori_shape"] = ori_shape + (c,)
            else:
                scale_factor = 1.0
                output_shapes.append((h, w))
            meta["scale_factor"] = scale_factor
            meta["flip"] = False
            padh, padw = images.shape[-2:]
            meta["pad_shape"] = (padh, padw, c)
            metas.append(meta)

        if self.training:
            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
            if gt_instances[0].has("gt_masks"):
                from mmdet.core import PolygonMasks as mm_PolygonMasks, BitmapMasks as mm_BitMasks

                def convert_mask(m, shape):
                    # mmdet mask format
                    if isinstance(m, BitMasks):
                        return mm_BitMasks(m.tensor.cpu().numpy(), shape[0], shape[1])
                    else:
                        return mm_PolygonMasks(m.polygons, shape[0], shape[1])

                gt_masks = [convert_mask(x.gt_masks, x.image_size) for x in gt_instances]
            else:
                gt_masks = None
            losses_and_metrics = self.detector.forward_train(
                images,
                metas,
                [x.gt_boxes.tensor for x in gt_instances],
                [x.gt_classes for x in gt_instances],
                gt_masks=gt_masks,
            )
            return _parse_losses(losses_and_metrics)
        else:
            results = self.detector.simple_test(images, metas, rescale=rescale)
            results = [
                {"instances": _convert_mmdet_result(r, shape)}
                for r, shape in zip(results, output_shapes)
            ]
            return results
예제 #9
0
    def test_roi_heads(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
        cfg.MODEL.MASK_ON = True
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}

        image_shape = (15, 15)
        gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]],
                                 dtype=torch.float32)
        gt_instance0 = Instances(image_shape)
        gt_instance0.gt_boxes = Boxes(gt_boxes0)
        gt_instance0.gt_classes = torch.tensor([2, 1])
        gt_instance0.gt_masks = BitMasks(torch.rand((2, ) + image_shape) > 0.5)
        gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]],
                                 dtype=torch.float32)
        gt_instance1 = Instances(image_shape)
        gt_instance1.gt_boxes = Boxes(gt_boxes1)
        gt_instance1.gt_classes = torch.tensor([1, 2])
        gt_instance1.gt_masks = BitMasks(torch.rand((2, ) + image_shape) > 0.5)
        gt_instances = [gt_instance0, gt_instance1]

        proposal_generator = build_proposal_generator(cfg, feature_shape)
        roi_heads = StandardROIHeads(cfg, feature_shape)

        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, gt_instances)
            _, detector_losses = roi_heads(images, features, proposals,
                                           gt_instances)

        detector_losses.update(proposal_losses)
        expected_losses = {
            "loss_cls": 4.5253729820251465,
            "loss_box_reg": 0.009785720147192478,
            "loss_mask": 0.693184494972229,
            "loss_rpn_cls": 0.08186662942171097,
            "loss_rpn_loc": 0.1104838103055954,
        }
        succ = all(
            torch.allclose(detector_losses[name],
                           torch.tensor(expected_losses.get(name, 0.0)))
            for name in detector_losses.keys())
        self.assertTrue(
            succ,
            "Losses has changed! New losses: {}".format(
                {k: v.item()
                 for k, v in detector_losses.items()}),
        )
예제 #10
0
    def test_StandardROIHeads_scriptability(self):
        cfg = get_cfg()
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
        cfg.MODEL.MASK_ON = True
        cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.01
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}

        roi_heads = StandardROIHeads(cfg, feature_shape).eval()

        proposal0 = Instances(image_sizes[0])
        proposal_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]],
                                       dtype=torch.float32)
        proposal0.proposal_boxes = Boxes(proposal_boxes0)
        proposal0.objectness_logits = torch.tensor([0.5, 0.7],
                                                   dtype=torch.float32)

        proposal1 = Instances(image_sizes[1])
        proposal_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]],
                                       dtype=torch.float32)
        proposal1.proposal_boxes = Boxes(proposal_boxes1)
        proposal1.objectness_logits = torch.tensor([0.1, 0.9],
                                                   dtype=torch.float32)
        proposals = [proposal0, proposal1]

        pred_instances, _ = roi_heads(images, features, proposals)
        fields = {
            "objectness_logits": torch.Tensor,
            "proposal_boxes": Boxes,
            "pred_classes": torch.Tensor,
            "scores": torch.Tensor,
            "pred_masks": torch.Tensor,
            "pred_boxes": Boxes,
            "pred_keypoints": torch.Tensor,
            "pred_keypoint_heatmaps": torch.Tensor,
        }
        with patch_instances(fields) as new_instances:
            proposal0 = new_instances.from_instances(proposal0)
            proposal1 = new_instances.from_instances(proposal1)
            proposals = [proposal0, proposal1]
            scripted_rot_heads = torch.jit.script(roi_heads)
            scripted_pred_instances, _ = scripted_rot_heads(
                images, features, proposals)

        for instance, scripted_instance in zip(pred_instances,
                                               scripted_pred_instances):
            assert_instances_allclose(instance,
                                      scripted_instance.to_instances(),
                                      rtol=0)
예제 #11
0
    def preprocess_image(self, batched_inputs):
        # all models from detectron2 preprocess the images the same way
        # this could change in the future; fingers crossed
        # reference: https://github.com/facebookresearch/detectron2/tree/master/detectron2/modeling/meta_arch  # last checked: 23.11.20

        images = [x["image"].to(self.model.device) for x in batched_inputs]
        images = [(x - self.model.pixel_mean) / self.model.pixel_std for x in images]
        images = ImageList.from_tensors(images, self.model.backbone.size_divisibility)
        return images
예제 #12
0
 def _preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     labels = torch.LongTensor([x["label"]
                                for x in batched_inputs]).to(self.device)
     images = ImageList.from_tensors(images)
     return images, labels
예제 #13
0
 def preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x.to(self.device) for x in batched_inputs]
     norms = [self.normalizer(x) for x in images]
     size = (norms[0].shape[1], norms[0].shape[2])
     images = ImageList.from_tensors(norms, self.backbone.size_divisibility)
     return images, size
예제 #14
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
                Each item in the list contains the inputs for one image.

                For now, each item in the list is a dict that contains:

                * "image": Tensor, image in (C, H, W) format.
                * "instances": Instances
                * "sem_seg": semantic segmentation ground truth.
                * Other information that's included in the original dicts, such as:
                  "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            list[dict]:
                each dict has the results for one image. The dict contains the following keys:

                * "instances": see :meth:`GeneralizedRCNN.forward` for its format.
                * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format.
                * "panoptic_seg": See the return value of
                  :func:`combine_semantic_and_instance_outputs` for its format.
        """
        if not self.training:
            return self.inference(batched_inputs)
        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)

        assert "sem_seg" in batched_inputs[0]
        gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs]
        gt_sem_seg = ImageList.from_tensors(
            gt_sem_seg, self.backbone.size_divisibility, self.sem_seg_head.ignore_value
        ).tensor
        sem_seg_results, sem_seg_losses, feat, seg_score = self.sem_seg_head(features, gt_sem_seg)
        #
        del sem_seg_results
        #
        gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
        proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
        detector_results, detector_losses, box_features = self.roi_heads(
            images, features, proposals, gt_instances
        )

        #############################
        # Graph op
        #############################
        instance, sem_seg_results, losses_graph = self.graph_connection(
            box_features, detector_results, features,
            feat, seg_score, gt_sem_seg,
        )

        losses = sem_seg_losses
        losses.update(proposal_losses)
        losses.update(detector_losses)
        losses.update(losses_graph)
        return losses
예제 #15
0
 def preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     image_scales = [x["im_scale"] for x in batched_inputs]
     images = ImageList.from_tensors(images, self.backbone.size_divisibility)
     images.image_scales = image_scales
     return images
예제 #16
0
 def preprocess_image(self, batched_inputs: Tuple[Dict[str, Tensor]]):
     '''
         Normalize and batch the input images.
     '''
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [x.float().div(255) for x in images if x.dtype != torch.float]
     images = [(x - self.pixel_mean) / self.pixel_std for x in images]
     images = ImageList.from_tensors(images)
     return images
예제 #17
0
def repad_image_list(
    il: "ImageList", pad_value: float = 0.0, inplace: bool = True
) -> "ImageList":
    if inplace == False:
        il = ImageList(il.tensor.clone().detach(), copy.deepcopy(il.image_sizes))
    for i in range(len(il.image_sizes)):
        h, w = il.image_sizes[i]
        il.tensor[i, ..., h:, :] = pad_value
        il.tensor[i, ..., :, w:] = pad_value
    return il
예제 #18
0
 def preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [(x - self.pixel_mean) / self.pixel_std for x in images]
     if self.dynamic:
         images = ImageList.from_tensors(images, self.backbone.size_divisibility)
     else:
         if self.training:
             min_size = self.input.MIN_SIZE_TRAIN
             max_size = self.input.MAX_SIZE_TRAIN
         else:
             min_size = self.input.MIN_SIZE_TEST
             max_size = self.input.MAX_SIZE_TEST
         min_size = min_size[0] if isinstance(min_size, tuple) else min_size
         images = ImageList.from_tensors(images, self.backbone.size_divisibility,
                                         max_height=min_size, max_width=max_size)
     return images
예제 #19
0
 def preprocess_image(self, batched_inputs, norm=True):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     if norm:
         images = [self.normalizer(x) for x in images]
     images = ImageList.from_tensors(images, 512)
     images = images.to(self.device)
     return images
예제 #20
0
    def test_rroi_heads(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
        cfg.MODEL.ROI_HEADS.NAME = "RROIHeads"
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}

        image_shape = (15, 15)
        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32)
        gt_instance0 = Instances(image_shape)
        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
        gt_instance0.gt_classes = torch.tensor([2, 1])
        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32)
        gt_instance1 = Instances(image_shape)
        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
        gt_instance1.gt_classes = torch.tensor([1, 2])
        gt_instances = [gt_instance0, gt_instance1]

        proposal_generator = build_proposal_generator(cfg, feature_shape)
        roi_heads = build_roi_heads(cfg, feature_shape)

        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
            _, detector_losses = roi_heads(images, features, proposals, gt_instances)

        detector_losses.update(proposal_losses)
        expected_losses = {
            "loss_cls": 4.365657806396484,
            "loss_box_reg": 0.0015851043863222003,
            "loss_rpn_cls": 0.2427729219198227,
            "loss_rpn_loc": 0.3646621108055115,
        }
        succ = all(
            torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0)))
            for name in detector_losses.keys()
        )
        self.assertTrue(
            succ,
            "Losses has changed! New losses: {}".format(
                {k: v.item() for k, v in detector_losses.items()}
            ),
        )
예제 #21
0
    def forward(self, batched_inputs):
        # complete image
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [self.normalizer(x) for x in images]
        images = ImageList.from_tensors(images,
                                        self.inpaint_net.size_divisibility)
        # triplet input maps:
        # erased regions
        masks = [x["mask"].to(self.device) for x in batched_inputs]
        masks = ImageList.from_tensors(masks,
                                       self.inpaint_net.size_divisibility)
        # mask the input image with masks
        erased_ims = images.tensor * (1. - masks.tensor)
        # ones map
        ones_ims = [
            torch.ones_like(x["mask"].to(self.device)) for x in batched_inputs
        ]
        ones_ims = ImageList.from_tensors(ones_ims,
                                          self.inpaint_net.size_divisibility)
        # the conv layer use zero padding, this is used to indicate the image boundary

        # generation process
        input_tensor = torch.cat([erased_ims, ones_ims.tensor, masks.tensor],
                                 dim=1)
        coarse_inp, fine_inp, offset_flow = self.inpaint_net(
            input_tensor, masks.tensor)
        # offset_flow is used to visualize

        if self.training:
            raise NotImplementedError
        else:
            processed_results = []
            inpainted_im = erased_ims * (
                1. - masks.tensor) + fine_inp * masks.tensor
            for result, input_per_image, image_size in zip(
                    inpainted_im, batched_inputs, images.image_sizes):
                height = input_per_image.get("height")
                width = input_per_image.get("width")
                r = sem_seg_postprocess(result, image_size, height, width)
                # abuse semantic segmentation postprocess. it basically does some resize
                processed_results.append({"inpainted": r})
            return processed_results
예제 #22
0
def gather_instance_to_global_mask(pred_mask_logits, instances):
    cls_agnostic_mask = pred_mask_logits.size(1) == 1
    mask_size = pred_mask_logits.size()[-2:]
    assert cls_agnostic_mask

    gt_masks = []
    for ins_per_im in instances:
        # assert mask_size == ins_per_im.gt_masks.image_size
        gt_masks.append(ins_per_im.gt_masks.tensor.any(0)[None] != 0)
    # FIXME replace hard coded 32
    return ImageList.from_tensors(gt_masks, 32).tensor
예제 #23
0
    def load(self, filenames: ManyPaths) -> ImagesWithSize:
        images = [{"file_name": str(f)} for f in filenames]
        images = [self.mapper(i) for i in images]

        from detectron2.structures import ImageList

        images = ImageList.from_tensors([image["image"] for image in images])

        return (images.tensor.float() / 256,
                torch.tensor(images.image_sizes,
                             dtype=torch.int32))  # type: ignore
예제 #24
0
    def forward(self, inputs):
        if not self.training:
            return self.inference(inputs)

        images = [x["image"] for x in inputs]
        images = ImageList.from_tensors(images, 1)
        ret = self.conv(images.tensor)
        ret = self.bn(ret)
        ret = self.relu(ret)
        ret = self.avgpool(ret)
        return {"loss": ret.norm()}
예제 #25
0
 def preprocess_image(self, batched_inputs: List[Dict[str, Tensor]]):
     """
     Normalize, pad and batch the input images.
     """
     images = [
         self._move_to_current_device(x["image"]) for x in batched_inputs
     ]
     images = [(x - self.pixel_mean) / self.pixel_std for x in images]
     images = ImageList.from_tensors(images,
                                     self.backbone.size_divisibility)
     return images
예제 #26
0
    def forward(self, images, features, gt_instances=None):
        """
        See :class:`RPN.forward`.
        """
        num_branch = self.num_branch if self.training or not self.trident_fast else 1
        # Duplicate images and gt_instances for all branches in TridentNet.
        all_images = ImageList(torch.cat([images.tensor] * num_branch),
                               images.image_sizes * num_branch)
        all_gt_instances = gt_instances * num_branch if gt_instances is not None else None

        return super(TridentRPN, self).forward(all_images, features,
                                               all_gt_instances)
예제 #27
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
            Each item in the list contains the inputs for one image.

        For now, each item in the list is a dict that contains:
        * "image": Tensor, image in (C, H, W) format.
        * "instances": Instances
        * "sem_seg": semantic segmentation ground truth.
        * Other information that's included in the original dicts, such as:
            "height", "width" (int): the output resolution of the model, used in inference.

        Returns:
            list[dict]:
                each dict is the results for one image. The dict contains the following keys:
                * "instances": Instances results.
                * "sem_seg": Semantic Segmentation results.
                * "panoptic_seg": available when `MODEL.INFERENCE.COMBINE.ENABLE`.
                  See the return value of
                  :func:`combine_thing_and_stuff` for its format.
        """
        if self.export_onnx:
            print('[WARN] exporting onnx...')
            assert isinstance(batched_inputs, torch.Tensor) or isinstance(
                batched_inputs, list), 'onnx export, batched_inputs only needs image tensor'
            images = batched_inputs
        else:
            images = [x["image"].to(self.device) for x in batched_inputs]
            images = [self.normalizer(x) for x in images]
            images = ImageList.from_tensors(
                images, self.backbone.size_divisibility)

        if self.export_onnx:
            features = self.backbone(images)
        else:
            features = self.backbone(images.tensor)
        encode_feat = self.semantic_fpn(features)
        encode_feat = self.feature_encoder(encode_feat)
        features_in = [features[_feat] for _feat in self.in_feature]
        pred_centers, pred_regions, pred_weights = multi_apply(
            self.forward_single_level, features_in)

        if self.training:
            gt_dict = self.get_ground_truth.generate(
                batched_inputs, images, pred_weights, encode_feat)
            return self.losses(pred_centers, pred_regions, pred_weights, encode_feat, gt_dict)
        else:
            if self.export_onnx:
                # return pred_centers, pred_regions, pred_weights
                return self.inference_onnx(batched_inputs, images, pred_centers, pred_regions, pred_weights, encode_feat)
            else:
                return self.inference(batched_inputs, images, pred_centers, pred_regions, pred_weights, encode_feat)
예제 #28
0
            def forward(self, inputs):
                images = [x["image"] for x in inputs]
                images = ImageList.from_tensors(images, 1)

                ret = self.conv(images.tensor)
                losses = {"loss": ret.norm()}

                # run the same conv again
                ret1 = self.conv(images.tensor)
                losses["ret1"] = ret1.norm()

                return losses
예제 #29
0
    def preprocess_image(self, batched_inputs):
        """
        Normalize, pad and batch the input images.
        """
        images = [x["image"].float().to(self.device) for x in batched_inputs]
        # print(images)
        # assert False
        images = [self.normalizer(x) for x in images]

        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)
        return images
    def test_rroi_heads(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
        cfg.MODEL.ROI_HEADS.NAME = "RROIHeads"
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
        backbone = build_backbone(cfg)
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}

        image_shape = (15, 15)
        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]],
                                 dtype=torch.float32)
        gt_instance0 = Instances(image_shape)
        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
        gt_instance0.gt_classes = torch.tensor([2, 1])
        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]],
                                 dtype=torch.float32)
        gt_instance1 = Instances(image_shape)
        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
        gt_instance1.gt_classes = torch.tensor([1, 2])
        gt_instances = [gt_instance0, gt_instance1]

        proposal_generator = build_proposal_generator(cfg,
                                                      backbone.output_shape())
        roi_heads = build_roi_heads(cfg, backbone.output_shape())

        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, gt_instances)
            _, detector_losses = roi_heads(images, features, proposals,
                                           gt_instances)

        expected_losses = {
            "loss_cls": torch.tensor(4.381618499755859),
            "loss_box_reg": torch.tensor(0.0011829272843897343),
        }
        for name in expected_losses.keys():
            err_msg = "detector_losses[{}] = {}, expected losses = {}".format(
                name, detector_losses[name], expected_losses[name])
            self.assertTrue(
                torch.allclose(detector_losses[name], expected_losses[name]),
                err_msg)