Beispiel #1
0
    def visualize_training(self, batched_inputs, results):  #image,heatmap):#,
        """
        A function used to visualize ground truth images and final network predictions.
        It shows ground truth bounding boxes on the original image and up to 20
        predicted object bounding boxes on the original image.

        Args:
            batched_inputs (list): a list that contains input to the model.
            results (List[Instances]): a list of #images elements.
        """
        from pointscollection.utils import exVisualizer as Visualizer
        from detectron2.data.detection_utils import convert_image_to_rgb

        assert len(batched_inputs) == len(
            results), "Cannot visualize inputs and results of different sizes"
        # storage = get_event_storage()
        max_boxes = 100

        image_index = 0  # only visualize a single image
        img = batched_inputs[image_index]["image"]
        img = convert_image_to_rgb(img.permute(1, 2, 0), "BGR")
        print(batched_inputs[0]['file_name'], batched_inputs[0]['image_id'])

        # v_gt = Visualizer(img, None)
        # # v_gt = v_gt.overlay_instances(boxes=batched_inputs[image_index]["instances"].gt_boxes)
        # anno_img = v_gt.get_image()
        processed_results = _postprocess(results[image_index], img.shape[0],
                                         img.shape[1])
        predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu(
        ).numpy()
        predicted_mask = processed_results.pred_masks.detach().cpu().numpy()
        predicted_points = processed_results.pred_points.detach().cpu().numpy()

        v_pred = Visualizer(img, None)
        v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes],
                                          masks=predicted_mask[0:max_boxes],
                                          points=predicted_points[0:max_boxes])
        prop_img = v_pred.get_image()
        vis_img = prop_img  # np.vstack((anno_img, prop_img))
        # vis_img = vis_img.transpose(2, 0, 1)
        # vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results"
        # plt.imshow(vis_img)
        # plt.show()
        plt.imsave(
            'output/result_show/{:0>12}.png'.format(
                batched_inputs[0]['image_id']), vis_img)
Beispiel #2
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DetectionTransform` .
                Each item in the list contains the inputs for one image.
            For now, each item in the list is a dict that contains:
                image: Tensor, image in (C, H, W) format.
                instances: Instances
                Other information that's included in the original dicts, such as:
                    "height", "width" (int): the output resolution of the model, used in inference.
                        See :meth:`postprocess` for details.
         Returns:
            losses (dict[str: Tensor]): mapping from a named loss to a tensor
                storing the loss. Used during training only.
        """
        images = self.preprocess_image(batched_inputs)
        if "instances" in batched_inputs[0]:
            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
        elif "targets" in batched_inputs[0]:
            log_first_n(
                logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
            )
            gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
        else:
            gt_instances = None

        # print(images.image_sizes)
        # print(images.tensor.size())
        features = self.backbone(images.tensor)
        # for k,v in features.items():
        #     plt.imshow(v[0].squeeze().mean(0).cpu().numpy())
        #     plt.show()
        classify_features = [features[f][0] for f in self.cin_features]
        points_features =[features[f][1] for f in self.pin_features]
        ins_features=[features[f][0] for f in self.ins_features]
        # apply the head
        # print(classify_features[0].size())

        pf_b,pf_c,pf_h,pf_w=points_features[-1].size()
        target_points=points_features[-1].new_zeros(pf_b,2,pf_h,pf_w,requires_grad=False)
        pred_digits=self.cls_head(classify_features)
        pred_points=self.pc_head(target_points,points_features)

        if self.training:
            # get ground truths for class labels and box targets, it will label each anchor
            output_size=classify_features[-1].size()
            gt_clses, gt_belongs, gt_masks, gt_ins= self.get_ground_truth(gt_instances,output_size)
            # compute the loss
            return self.losses(
                gt_clses,
                gt_belongs,
                gt_masks,
                gt_ins,
                pred_digits,
                pred_points,
                ins_features[0]
            )
        else:
            # do inference to get the output
            results = self.inference(pred_digits, pred_points,ins_features[0],images)
            # plt.imshow(np.max(pred_digits[0].cpu().numpy(),0))
            # plt.show()
            # self.visualize_training(batched_inputs,results)
            processed_results = []
            for results_im, input_im, image_size in zip(
                results, batched_inputs, images.image_sizes
            ):
                height = input_im.get("height", image_size[0])
                width = input_im.get("width", image_size[1])
                # this is to do post-processing with the image size
                # print(height,width,image_size)
                result= results_im
                r = _postprocess(result,height, width)
                processed_results.append({"instances": r})
            return processed_results