コード例 #1
0
        def f(batched_inputs, c2_inputs, c2_results):
            image_sizes = [[int(im[0]), int(im[1])]
                           for im in c2_inputs["im_info"]]
            detector_results = assemble_rcnn_outputs_by_name(
                image_sizes, c2_results, force_mask_on=True)
            sem_seg_results = c2_results["sem_seg"]

            # copied from meta_arch/panoptic_fpn.py ...
            processed_results = []
            for sem_seg_result, detector_result, input_per_image, image_size in zip(
                    sem_seg_results, detector_results, batched_inputs,
                    image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size,
                                                height, width)
                detector_r = detector_postprocess(detector_result, height,
                                                  width)

                processed_results.append({
                    "sem_seg": sem_seg_r,
                    "instances": detector_r
                })

                if combine_on:
                    panoptic_r = combine_semantic_and_instance_outputs(
                        detector_r,
                        sem_seg_r.argmax(dim=0),
                        combine_overlap_threshold,
                        combine_stuff_area_limit,
                        combine_instances_confidence_threshold,
                    )
                    processed_results[-1]["panoptic_seg"] = panoptic_r
            return processed_results
コード例 #2
0
ファイル: dynamic4seg.py プロジェクト: zivzone/cvpods
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
        For now, each item in the list is a dict that contains:
            image: Tensor, image in (C, H, W) format.
            sem_seg: semantic segmentation ground truth
            Other information that's included in the original dicts, such as:
                "height", "width" (int): the output resolution of the model, used in inference.
                    See :meth:`postprocess` for details.
        Returns:
            list[dict]: Each dict is the output for one input image.
                The dict contains one key "sem_seg" whose value is a
                Tensor of the output resolution that represents the
                per-pixel segmentation prediction.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [self.normalizer(x) for x in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)

        # step_rate: a float, calculated by current_step/total_step,
        #         This parameter is used for Scheduled Drop Path.
        step_rate = self.iter * 1.0 / self.max_iter
        self.iter += 1
        features, expt_flops, real_flops = self.backbone(
            images.tensor, step_rate)

        if "sem_seg" in batched_inputs[0]:
            targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
            targets = ImageList.from_tensors(
                targets, self.backbone.size_divisibility, False,
                self.sem_seg_head.ignore_value).tensor
        else:
            targets = None

        results, losses = self.sem_seg_head(features, targets)
        # calculate flops
        real_flops += self.sem_seg_head.flops
        # remove grad, avoid adding flops to the loss sum
        real_flops = real_flops.detach().requires_grad_(False)
        expt_flops = expt_flops.detach().requires_grad_(False)
        flops = {'real_flops': real_flops, 'expt_flops': expt_flops}
        # use budget constraint for training
        if self.training:
            if self.constrain_on and step_rate >= self.unupdate_rate:
                warm_up_rate = min(1.0,
                                   (step_rate - self.unupdate_rate) / 0.02)
                loss_budget = self.budget_constrint(expt_flops,
                                                    warm_up_rate=warm_up_rate)
                losses.update({'loss_budget': loss_budget})

            losses.update(flops)
            return losses

        processed_results = []
        for result, input_per_image, image_size in zip(results, batched_inputs,
                                                       images.image_sizes):
            height = input_per_image.get("height")
            width = input_per_image.get("width")
            r = sem_seg_postprocess(result, image_size, height, width)
            processed_results.append({"sem_seg": r, "flops": flops})
        return processed_results