Ejemplo n.º 1
0
 def _postprocess(self, instances, proposals, batched_inputs, image_sizes):
     """
     Rescale the output instances to the target size.
     """
     # note: private function; subject to changes
     processed_results = []
     for results_per_image, proposal_per_image, input_per_image, image_size in zip(
             instances, proposals, batched_inputs, image_sizes
     ):
         height = input_per_image.get("height", image_size[0])
         width = input_per_image.get("width", image_size[1])
         r = detector_postprocess(results_per_image, height, width)
         processed_results.append(
             {"instances": r, "proposals": detector_postprocess(proposal_per_image, height, width)})
     return processed_results
Ejemplo n.º 2
0
    def __call__(self, inputs, tensor_inputs, tensor_outputs):
        results_per_image = self.outputs_schema(tensor_outputs)

        assert len(inputs) == 1, "only support single batch"
        width, height = inputs[0]["width"], inputs[0]["height"]
        r = detector_postprocess(results_per_image, height, width)
        return [{"instances": r}]
Ejemplo n.º 3
0
    def visualize_training(self, batched_inputs, results):
        """
        A function used to visualize ground truth images and final network predictions.
        It shows ground truth bounding boxes on the original image and up to 20
        predicted object bounding boxes on the original image.

        Args:
            batched_inputs (list): a list that contains input to the model.
            results (List[Instances]): a list of #images elements.
        """
        from detectron2.utils.visualizer import Visualizer

        assert len(batched_inputs) == len(
            results
        ), "Cannot visualize inputs and results of different sizes"
        storage = get_event_storage()
        max_boxes = 20

        image_index = 0  # only visualize a single image
        img = batched_inputs[image_index]["image"]
        img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format)
        v_gt = Visualizer(img, None)
        v_gt = v_gt.overlay_instances(boxes=batched_inputs[image_index]["instances"].gt_boxes)
        anno_img = v_gt.get_image()
        processed_results = detector_postprocess(results[image_index], img.shape[0], img.shape[1])
        predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu().numpy()

        v_pred = Visualizer(img, None)
        v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes])
        prop_img = v_pred.get_image()
        vis_img = np.vstack((anno_img, prop_img))
        vis_img = vis_img.transpose(2, 0, 1)
        vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results"
        storage.put_image(vis_name, vis_img)
Ejemplo n.º 4
0
 def postprocess(instances, batched_inputs):
     processed_results = []
     for results_per_image, input_per_image in zip(instances,
                                                   batched_inputs):
         r = detector_postprocess(results_per_image.to('cpu'), 300, 300)
         processed_results.append({"instances": r})
     return processed_results
Ejemplo n.º 5
0
        def f(batched_inputs, c2_inputs, c2_results):
            image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]]
            detector_results = assemble_rcnn_outputs_by_name(
                image_sizes, c2_results, force_mask_on=True
            )
            sem_seg_results = c2_results["sem_seg"]

            # copied from meta_arch/panoptic_fpn.py ...
            processed_results = []
            for sem_seg_result, detector_result, input_per_image, image_size in zip(
                sem_seg_results, detector_results, batched_inputs, image_sizes
            ):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width)
                detector_r = detector_postprocess(detector_result, height, width)

                processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r})

                if combine_on:
                    panoptic_r = combine_semantic_and_instance_outputs(
                        detector_r,
                        sem_seg_r.argmax(dim=0),
                        combine_overlap_threshold,
                        combine_stuff_area_limit,
                        combine_instances_confidence_threshold,
                    )
                    processed_results[-1]["panoptic_seg"] = panoptic_r
            return processed_results
Ejemplo n.º 6
0
 def postprocess(self, outputs, images, image_ids, to_cpu):
     frames = []
     for instances, image, image_id in zip(outputs, images, image_ids):
         height, width = image.shape[:2]
         instances = detector_postprocess(instances, height, width)
         type_valid = [
             self.model_meta.thing_classes[pred_class] in TYPE_MAPPING
             for pred_class in instances.pred_classes]
         instances = instances[type_valid]
         instances.pred_classes = torch.as_tensor([
             TYPE_MAPPING[self.model_meta.thing_classes[pred_class]]
             for pred_class in instances.pred_classes])
         if len(instances) > 0:
             nms_mapping = torch.as_tensor([
                 NMS_MAPPING[pred_class.item()]
                 for pred_class in instances.pred_classes],
                 dtype=torch.int, device=self.device)
             nms_types = nms_mapping[:, 0]
             nms_scores = instances.scores + nms_mapping[:, 1]
             keep_indices = batched_nms(
                 instances.pred_boxes.tensor, nms_scores, nms_types,
                 self.nms_threshold)
             instances = instances[keep_indices]
         features = instances.roi_features.mean(dim=(2, 3))
         features = features / features.norm(dim=1, keepdim=True)
         instances.roi_features = features
         if to_cpu:
             instances = instances.to('cpu')
         frame = Frame(image_id, image, instances)
         frames.append(frame)
     return frames
Ejemplo n.º 7
0
    def assemble(self, batched_inputs, c2_inputs, c2_results):
        c2_results = {k: torch.Tensor(v) for k, v in c2_results.items()}

        image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]]

        num_features = len(
            [x for x in c2_results.keys() if x.startswith("box_cls_")])
        box_cls = [
            c2_results["box_cls_{}".format(i)] for i in range(num_features)
        ]
        box_delta = [
            c2_results["box_delta_{}".format(i)] for i in range(num_features)
        ]

        # For each feature level, feature should have the same batch size and
        # spatial dimension as the box_cls and box_delta.
        dummy_features = [
            box_delta[i].clone()[:, 0:0, :, :] for i in range(num_features)
        ]
        anchors = self.anchor_generator(dummy_features)

        # self.num_classess can be inferred
        self.num_classes = box_cls[0].shape[1] // (box_delta[0].shape[1] // 4)

        results = self.inference(box_cls, box_delta, anchors, image_sizes)
        processed_results = []
        for results_per_image, input_per_image, image_size in zip(
                results, batched_inputs, image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            r = detector_postprocess(results_per_image, height, width)
            processed_results.append({"instances": r})
        return processed_results
Ejemplo n.º 8
0
    def det_inference(
        self, batched_inputs, detected_instances=None, do_postprocess=True
    ):
        """
        Run inference on the given inputs.

        Args:
            batched_inputs (list[dict]): same as in :meth:`forward`
            detected_instances (None or list[Instances]): if not None, it
                contains an `Instances` object per image. The `Instances`
                object contains "pred_boxes" and "pred_classes" which are
                known boxes in the image.
                The inference will then skip the detection of bounding boxes,
                and only predict other per-ROI outputs.
            do_postprocess (bool): whether to apply post-processing on the outputs.

        Returns:
            same as in :meth:`forward`.
        """
        assert not self.training

        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)

        if detected_instances is None:
            if self.proposal_generator:
                proposals, _ = self.proposal_generator(images, features, None)
            else:
                assert "proposals" in batched_inputs[0]
                proposals = [
                    x["proposals"].to(self.device) for x in batched_inputs
                ]

            results, others = self.roi_heads(images, features, proposals, None)
            if isinstance(others, tuple):
                others, box_features = others

            else:
                box_features = None
        else:
            detected_instances = [
                x.to(self.device) for x in detected_instances
            ]
            results = self.roi_heads.forward_with_given_boxes(
                features, detected_instances
            )
            box_features = None

        if do_postprocess:
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                results, batched_inputs, images.image_sizes
            ):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results, box_features
        else:
            return results, box_features
Ejemplo n.º 9
0
def run_detector(raw_image,predictor,num_objects=100,verbose=True):
    with torch.no_grad():
        raw_height, raw_width = raw_image.shape[:2]
        if verbose: tqdm.write("Original image size: " + str((raw_height, raw_width)))
        
        # Preprocessing
        image = predictor.transform_gen.get_transform(raw_image).apply_image(raw_image)
        if verbose: tqdm.write("Transformed image size: "+str(image.shape[:2]))
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
        inputs = [{"image": image, "height": raw_height, "width": raw_width}]
        images = predictor.model.preprocess_image(inputs)
        
        # Run Backbone Res1-Res4
        features = predictor.model.backbone(images.tensor)
        
        # Generate proposals with RPN
        proposals, _ = predictor.model.proposal_generator(images, features, None)
        proposal = proposals[0]
        if verbose: tqdm.write('Proposal Boxes size: ' + str(proposal.proposal_boxes.tensor.shape))
        
        # Run RoI head for each proposal (RoI Pooling + Res5)
        proposal_boxes = [x.proposal_boxes for x in proposals]
        features = [features[f] for f in predictor.model.roi_heads.in_features]
        box_features = predictor.model.roi_heads._shared_roi_transform(
            features, proposal_boxes
        )
        feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
        if verbose: tqdm.write('Pooled features size: ' + str(feature_pooled.shape))
        
        # Predict classes and boxes for each proposal.
        pred_class_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(feature_pooled)
        outputs = FastRCNNOutputs(
            predictor.model.roi_heads.box2box_transform,
            pred_class_logits,
            pred_proposal_deltas,
            proposals,
            predictor.model.roi_heads.smooth_l1_beta,
        )
        probs = outputs.predict_probs()[0]
        boxes = outputs.predict_boxes()[0]
        
        # Note: BUTD uses raw RoI predictions,
        #       we use the predicted boxes instead.
        # boxes = proposal_boxes[0].tensor    
        
        # NMS
        for nms_thresh in np.arange(0.5, 1.0, 0.1):
            instances, ids = fast_rcnn_inference_single_image(
                boxes, probs, image.shape[1:], 
                score_thresh=0.2, nms_thresh=nms_thresh, topk_per_image=num_objects
            )
            if len(ids) == num_objects:
                break
                
        instances = detector_postprocess(instances, raw_height, raw_width)
        roi_features = feature_pooled[ids].detach()
        if verbose: tqdm.write(str(instances))
        
        return instances, roi_features
Ejemplo n.º 10
0
 def eval_wrapper(inputs):
     """
     The exported model does not contain the final resize step, which is typically
     unused in deployment but needed for evaluation. We add it manually here.
     """
     input = inputs[0]
     instances = traceable_model.outputs_schema(ts_model(input["image"]))[0]["instances"]
     postprocessed = detector_postprocess(instances, input["height"], input["width"])
     return [{"instances": postprocessed}]
Ejemplo n.º 11
0
    def forward(self, batched_inputs: Tuple[Dict[str, Tensor]]):
        """
        Args:
            batched_inputs(list): batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                    See :meth:`postprocess` for details.
        Returns:
            dict[str: Tensor]:
        """
        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)[self.head_in_features]
        features = self.upsample(features)
        pred_dict = self.head(features)

        if self.training:
            assert not torch.jit.is_scripting(), "Not supported"
            assert "instances" in batched_inputs[
                0], "Instance annotations are missing in training!"
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
            gt_dict = self.gt_generator(gt_instances, images.tensor.shape)

            losses = self.losses(pred_dict, gt_dict)

            if self.vis_period > 0:
                storage = get_event_storage()
                if storage.iter % self.vis_period == 0:
                    results = self.inference(pred_dict['pred_hm'],
                                             pred_dict['pred_wh'],
                                             pred_dict['pred_reg'],
                                             images.image_sizes)
                    self.visualize_training(batched_inputs, results)

            return losses
        else:
            results = self.inference(pred_dict['pred_hm'],
                                     pred_dict['pred_wh'],
                                     pred_dict['pred_reg'], images.image_sizes)
            if torch.jit.is_scripting():
                return results
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
Ejemplo n.º 12
0
 def post_processing(self, batched_inputs, results, image_sizes):
     processed_results = []
     for results_per_image, input_per_image, image_size in zip(
             results, batched_inputs, image_sizes):
         height = input_per_image.get("height", image_size[0])
         width = input_per_image.get("width", image_size[1])
         r = detector_postprocess(results_per_image, height, width)
         processed_results.append({"instances": r})
     return processed_results
Ejemplo n.º 13
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        images = self.preprocess_image(batched_inputs)
        if "instances" in batched_inputs[0]:
            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
        elif "targets" in batched_inputs[0]:
            log_first_n(
                logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
            )
            gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
        else:
            gt_instances = None

        features = self.backbone(images.tensor)
        features = [features[f] for f in self.in_features]
        box_cls, box_delta = self.head(features)
        anchors = self.anchor_generator(features)
        if self.training:
            gt_classes, gt_anchors_reg_deltas = self.get_ground_truth(anchors, gt_instances)
            losses = self.losses(gt_classes, gt_anchors_reg_deltas, box_cls, box_delta)

            if self.vis_period > 0:
                storage = get_event_storage()
                if storage.iter % self.vis_period == 0:
                    results = self.inference(box_cls, box_delta, anchors, images.image_sizes)
                    self.visualize_training(batched_inputs, results)

            return losses
        else:
            results = self.inference(box_cls, box_delta, anchors, images.image_sizes)
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes
            ):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
Ejemplo n.º 14
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)
        features = [features[f] for f in self.in_features]

        anchors = self.anchor_generator(features)
        pred_logits, pred_anchor_deltas = self.head(features)
        # Transpose the Hi*Wi*A dimension to the middle:
        pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits]
        pred_anchor_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas]

        if self.training:
            assert "instances" in batched_inputs[0], "Instance annotations are missing in training!"
            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]

            gt_labels, gt_boxes = self.label_anchors(anchors, gt_instances)
            losses = self.losses(anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes)

            if self.vis_period > 0:
                storage = get_event_storage()
                if storage.iter % self.vis_period == 0:
                    results = self.inference(
                        anchors, pred_logits, pred_anchor_deltas, images.image_sizes
                    )
                    self.visualize_training(batched_inputs, results)

            return losses
        else:
            results = self.inference(anchors, pred_logits, pred_anchor_deltas, images.image_sizes)
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes
            ):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
Ejemplo n.º 15
0
 def convert_outputs(self, batched_inputs, inputs, results):
     results = self._wrapped_model.inference(results, inputs['im_info'])
     processed_results = []
     for results_per_image, input_per_image, image_size in zip(
             results, batched_inputs, inputs['im_info']):
         original_height = input_per_image.get("height", image_size[0])
         original_width = input_per_image.get("width", image_size[1])
         r = detector_postprocess(results_per_image, original_height,
                                  original_width)
         processed_results.append({"instances": r})
     return processed_results
Ejemplo n.º 16
0
    def forward(self, batched_inputs):
        """
        Args:
            Same as in :class:`GeneralizedRCNN.forward`

        Returns:
            list[dict]:
                Each dict is the output for one input image.
                The dict contains one key "proposals" whose value is a
                :class:`Instances` with keys "proposal_boxes" and "objectness_logits".
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)
        features = self.backbone(images.tensor)

        if "instances" in batched_inputs[0]:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
        elif "targets" in batched_inputs[0]:
            log_first_n(
                logging.WARN,
                "'targets' in the model inputs is now renamed to 'instances'!",
                n=10)
            gt_instances = [
                x["targets"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None

        masks = {
            key: ImageList.from_tensors([x[key] for x in batched_inputs],
                                        self.backbone.size_divisibility)
            for key in self.masks
        }
        proposals, proposal_losses = self.proposal_generator(
            images, features, gt_instances, **masks)
        # In training, the proposals are not useful at all but we generate them anyway.
        # This makes RPN-only models about 5% slower.
        if self.training:
            return proposal_losses

        processed_results = []
        for results_per_image, input_per_image, image_size in zip(
                proposals, batched_inputs, images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            r = detector_postprocess(results_per_image, height, width)
            processed_results.append({"proposals": r})

        return processed_results
Ejemplo n.º 17
0
 def __call__(self, tracing_adapter_wrapper, batch):
     """
     This function describes how to run the predictor using exported model. Note
     that `tracing_adapter_wrapper` runs the traced model under the hood and
     behaves exactly the same as the forward function.
     """
     assert len(batch) == 1, "only support single batch"
     width, height = batch[0]["width"], batch[0]["height"]
     inputs = D2RCNNTracingWrapper.generator_trace_inputs(batch)
     results_per_image = tracing_adapter_wrapper(inputs)
     r = detector_postprocess(results_per_image, height, width)
     return [{"instances": r}]
Ejemplo n.º 18
0
    def inference(self, batched_inputs):

        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)

        if self.proposal_generator:
            proposals, _ = self.proposal_generator(images, features)
        else:
            raise NotImplementedError

        detector_results, pan_detector_results = self.roi_heads(
            images, features, proposals)
        sem_seg_results, _ = self.sem_seg_head(features)
        pan_seg_results, _ = self.panoptic_head(None, sem_seg_results,
                                                pan_detector_results)

        processed_results = []
        for sem_seg_result, detector_result, pan_seg_result, input_per_image, image_size in zip(
                sem_seg_results, detector_results, pan_seg_results,
                batched_inputs, images.image_sizes):
            processed_result = {}
            height = input_per_image.get("height")
            width = input_per_image.get("width")
            sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height,
                                            width)
            detector_r = detector_postprocess(detector_result, height, width)
            processed_result.update({
                "sem_seg": sem_seg_r,
                "instances": detector_r
            })

            if self.combine_on:
                panoptic_r = combine_semantic_and_instance_outputs(
                    detector_r,
                    sem_seg_r.argmax(dim=0),
                    self.combine_overlap_threshold,
                    self.combine_stuff_area_limit,
                    self.combine_instances_confidence_threshold,
                )
            else:
                pan_pred = sem_seg_postprocess(pan_seg_result["pan_logit"],
                                               image_size, height, width)
                del pan_seg_result["pan_logit"]
                pan_seg_result["pan_pred"] = pan_pred.argmax(dim=0)
                panoptic_r = pan_seg_postprocess(pan_seg_result,
                                                 sem_seg_r.argmax(dim=0),
                                                 self.stuff_num_classes,
                                                 self.stuff_area_limit)
            processed_result.update({"panoptic_seg": panoptic_r})

            processed_results.append(processed_result)
        return processed_results
Ejemplo n.º 19
0
    def forward(self, batched_inputs: Tuple[Dict[str, Tensor]]):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:
                * image: Tensor, image in (C, H, W) format.
                * instances: Instances
                Other information that's included in the original dicts,
                such as:
                * "height", "width" (int): the output resolution of the model,
                  used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            in training, dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used
                during training only.
            in inference, the standard output format, described in
            :doc:`/tutorials/models`.
        """
        #import pickle
        #f=open('test.pkl','rb')
        #batched_inputs = pickle.load(f)
        #f.close()
        num_images = len(batched_inputs)
        #for x in batched_inputs:
        #    print(x["image"].size())
        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)
        features = [features[self.backbone_level]]

        anchors_image = self.anchor_generator(features)
        anchors = [copy.deepcopy(anchors_image) for _ in range(num_images)]
        pred_logits, pred_anchor_deltas = self.decoder(
            self.encoder(features[0]))
        # Transpose the Hi*Wi*A dimension to the middle:
        pred_logits = [permute_to_N_HWA_K(pred_logits, self.num_classes)]
        pred_anchor_deltas = [permute_to_N_HWA_K(pred_anchor_deltas, 4)]

        results = self.inference(anchors_image, pred_logits,
                                 pred_anchor_deltas, images.image_sizes)
        if torch.jit.is_scripting():
            return results
        processed_results = []
        for results_per_image, input_per_image, image_size in zip(
                results, batched_inputs, images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            r = detector_postprocess(results_per_image, height, width)
            processed_results.append({"instances": r})
        return processed_results
Ejemplo n.º 20
0
 def _postprocess(instances, batched_inputs: Tuple[Dict[str, torch.Tensor]],
                  image_sizes):
     """
     Rescale the output instances to the target size.
     """
     # note: private function; subject to changes
     processed_results = []
     for results_per_image, input_per_image, image_size in zip(
             instances, batched_inputs, image_sizes):
         height = input_per_image.get("height", image_size[0])
         width = input_per_image.get("width", image_size[1])
         r = detector_postprocess(results_per_image, height, width)
         processed_results.append({"instances": r})
     return processed_results
Ejemplo n.º 21
0
    def convert_outputs(self, batched_inputs, inputs, results):
        output_names = self.get_output_names()
        assert len(results) == len(output_names)
        results = self._ns.inference(results, inputs['im_info'])

        from detectron2.modeling.postprocessing import detector_postprocess
        processed_results = []
        for results_per_image, input_per_image, image_size in zip(
                results, batched_inputs, inputs['im_info']):
            original_height = input_per_image.get("height", image_size[0])
            original_width = input_per_image.get("width", image_size[1])
            r = detector_postprocess(results_per_image, original_height,
                                     original_width)
            processed_results.append({"instances": r})
        return processed_results
Ejemplo n.º 22
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of : class:`DatasetMapper`.
            Each item in the list contains the input for one image.
            For now, each item in the list is a dict that contains:
             * images: Tensor, image in (C, H, W) format.
             * instances: Instances.
             Other information that' s included in the original dict ,such as:
             * "height", "width"(int): the output resolution of the model,
             used in inference.See  `postprocess` for detail
        Return:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss, Used
                during training only.
                At inference stage, return predicted bboxes.
        """
        images = self.preprocess_image(batched_inputs)
        if "instances" in batched_inputs[0]:
            gt_instances = [
                x['instances'].to(self.device) for x in batched_inputs
            ]
        elif "targets" in batched_inputs[0]:
            log_first_n(logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10)
            gt_instances = [
                x['instances'].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None

        features = self.backbone(images.tensor)
        features = [features[f] for f in self.in_features]
        cls_outs, pts_outs_init, pts_outs_refine = self.head(features)
        center_pts = self.shift_generator(features)

        if self.training:
            return self.losses(center_pts, cls_outs, pts_outs_init, pts_outs_refine, gt_instances)
        else:
            results = self.inference(center_pts, cls_outs, pts_outs_init, pts_outs_refine, images)
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
Ejemplo n.º 23
0
    def inference(self, batched_inputs):
        assert not self.training
        
        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)
        features = [features[f] for f in self.head.in_features]

        results = self.head(images, features)

        processed_results = []
        for results_per_image, input_per_image, image_size in zip(
                results, batched_inputs, images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            r = detector_postprocess(results_per_image, height, width)
            processed_results.append({"instances": r})
        return processed_results
Ejemplo n.º 24
0
    def _test_retinanet_model(self, config_path):
        model = model_zoo.get(config_path, trained=True)
        model.eval()

        fields = {
            "pred_boxes": Boxes,
            "scores": Tensor,
            "pred_classes": Tensor,
        }
        script_model = export_torchscript_with_instances(model, fields)

        img = get_sample_coco_image()
        inputs = [{"image": img}]
        with torch.no_grad():
            instance = model(inputs)[0]["instances"]
            scripted_instance = convert_scripted_instances(script_model(inputs)[0])
            scripted_instance = detector_postprocess(scripted_instance, img.shape[1], img.shape[2])
        assert_instances_allclose(instance, scripted_instance)
    def _inference_one_image(self, input):
        """
        Args:
            input (dict): one dataset dict with "image" field being a CHW tensor

        Returns:
            dict: one output dict
        """
        orig_shape = (input["height"], input["width"])
        # For some reason, resize with uint8 slightly increases box AP but decreases densepose AP
        input["image"] = input["image"].to(torch.uint8)
        augmented_inputs, tfms = self._get_augmented_inputs(input)
        # Detect boxes from all augmented versions
        with self._turn_off_roi_heads(
            ["mask_on", "keypoint_on", "densepose_on"]):
            # temporarily disable roi heads
            all_boxes, all_scores, all_classes = self._get_augmented_boxes(
                augmented_inputs, tfms)
        merged_instances = self._merge_detections(all_boxes, all_scores,
                                                  all_classes, orig_shape)

        if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
            # Use the detected boxes to obtain new fields
            augmented_instances = self._rescale_detected_boxes(
                augmented_inputs, merged_instances, tfms)
            # run forward on the detected boxes
            outputs = self._batch_inference(augmented_inputs,
                                            augmented_instances)
            # Delete now useless variables to avoid being out of memory
            del augmented_inputs, augmented_instances
            # average the predictions
            if self.cfg.MODEL.MASK_ON:
                merged_instances.pred_masks = self._reduce_pred_masks(
                    outputs, tfms)
            if self.cfg.MODEL.DENSEPOSE_ON:
                merged_instances.pred_densepose = self._reduce_pred_densepose(
                    outputs, tfms)
            # postprocess
            merged_instances = detector_postprocess(merged_instances,
                                                    *orig_shape)
            return {"instances": merged_instances}
        else:
            return {"instances": merged_instances}
Ejemplo n.º 26
0
 def postprocess(self, outputs, images, image_ids, to_cpu):
     frames = []
     for instances, image, image_id in zip(outputs, images, image_ids):
         height, width = image.shape[:2]
         instances = detector_postprocess(instances, height, width)
         obj_types = [self.model_meta.thing_classes[pred_class]
                      for pred_class in instances.pred_classes]
         type_valid = [obj_type in TYPE_MAPPING for obj_type in obj_types]
         instances = instances[type_valid]
         features = instances.roi_features.mean(dim=(2, 3))
         features = features / features.norm(dim=1, keepdim=True)
         instances.roi_features = features
         instances.pred_classes = torch.as_tensor([
             TYPE_MAPPING[self.model_meta.thing_classes[pred_class]]
             for pred_class in instances.pred_classes])
         if to_cpu:
             instances = instances.to('cpu')
         frame = Frame(image_id, image, instances)
         frames.append(frame)
     return frames
Ejemplo n.º 27
0
 def __call__(self, batch, inputs, outputs):
     """
     This function describes how to run the predictor using exported model. Note
     that `tracing_adapter_wrapper` runs the traced model under the hood and
     behaves exactly the same as the forward function.
     """
     assert len(batch) == 1, "only support single batch"
     width, height = batch[0]["width"], batch[0]["height"]
     if self.detector_postprocess_done_in_model:
         image_shape = batch[0]["image"].shape  # chw
         if image_shape[1] != height or image_shape[2] != width:
             raise NotImplementedError(
                 f"Image tensor (shape: {image_shape}) doesn't match the"
                 f" input width ({width}) height ({height}). Since post-process"
                 f" has been done inside the torchscript without width/height"
                 f" information, can't recover the post-processed output to "
                 f"orignail resolution.")
         return [{"instances": outputs}]
     else:
         r = detector_postprocess(outputs, height, width)
         return [{"instances": r}]
Ejemplo n.º 28
0
    def convert_outputs(self, batched_inputs, inputs, results):
        image_sizes = inputs["image_sizes"]
        m_results = [Instances(image_size) for image_size in image_sizes]

        proposal_boxes = results["proposal_boxes"]
        for i in range(len(batched_inputs)):
            indices = (proposal_boxes[:, 0] == i).nonzero(as_tuple=True)
            proposals = proposal_boxes[indices][:, 1:]
            m_results[i].proposal_boxes = Boxes(proposals)
            m_results[i].objectness_logits = torch.linspace(
                10, -10, steps=proposals.size(0), device=proposals.device)

        # postprocess
        processed_results = []
        for results_per_image, input_per_image, image_size in zip(
                m_results, batched_inputs, image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            r = detector_postprocess(results_per_image, height, width)
            processed_results.append({"proposals": r})
        return processed_results
Ejemplo n.º 29
0
    def _postprocess(
        self,
        batched_inputs: Sequence[Dict[str, Any]],
        images_sizes: Sequence[Tuple[int, int]],
        detector_results: Sequence[Optional[Instances]],
        unsupervised_results: Sequence[Optional[torch.Tensor]],
    ) -> List[Dict[str, Any]]:
        batched_inputs
        n_inputs = len(batched_inputs)
        if n_inputs != len(images_sizes):
            raise ValueError(f"length mismatch; {n_inputs=} but {len(images_sizes)=}")
        if n_inputs != len(detector_results):
            raise ValueError(
                f"length mismatch; {n_inputs=} but {len(detector_results)=}"
            )
        if n_inputs != len(unsupervised_results):
            raise ValueError(
                f"length mismatch; {n_inputs=} but {len(unsupervised_results)=}"
            )

        results: List[Dict[str, Any]] = [{} for _ in range(n_inputs)]
        for i in range(len(batched_inputs)):
            image_input = batched_inputs[i]
            image_size = images_sizes[i]
            image_instances = detector_results[i]
            image_unsup_result = unsupervised_results[i]

            h: int = image_input.get("height", image_size[0])
            w: int = image_input.get("width", image_size[1])
            if image_instances is not None:
                r = detector_postprocess(image_instances, h, w)
                results[i]["instances"] = r
            if image_unsup_result is not None:
                u = self.unsupervised_head.postprocess(
                    image_unsup_result, image_size, h, w
                )
                results[i]["unsupervised"] = u
        return results
Ejemplo n.º 30
0
    def postprocess(self, instances, batched_inputs, image_sizes):
        """
            Rescale the output instances to the target size.
        """
        # note: private function; subject to changes
        processed_results = []
        for results_per_image, input_per_image, image_size in zip(
                instances, batched_inputs, image_sizes):
            boxes = results_per_image.pred_boxes.tensor
            scores = results_per_image.scores
            class_idxs = results_per_image.pred_classes

            # Apply per-class nms for each image
            keep = batched_nms(boxes, scores, class_idxs, self.nms_thresh)
            keep = keep[:self.max_detections_per_image]
            results_per_image = results_per_image[keep]

            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            r = detector_postprocess(results_per_image, height, width)
            processed_results.append({"instances": r})

        return processed_results