def f(batched_inputs, c2_inputs, c2_results): image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]] detector_results = assemble_rcnn_outputs_by_name( image_sizes, c2_results, force_mask_on=True) sem_seg_results = c2_results["sem_seg"] # copied from meta_arch/panoptic_fpn.py ... processed_results = [] for sem_seg_result, detector_result, input_per_image, image_size in zip( sem_seg_results, detector_results, batched_inputs, image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width) detector_r = detector_postprocess(detector_result, height, width) processed_results.append({ "sem_seg": sem_seg_r, "instances": detector_r }) if combine_on: panoptic_r = combine_semantic_and_instance_outputs( detector_r, sem_seg_r.argmax(dim=0), combine_overlap_threshold, combine_stuff_area_limit, combine_instances_confidence_threshold, ) processed_results[-1]["panoptic_seg"] = panoptic_r return processed_results
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: image: Tensor, image in (C, H, W) format. sem_seg: semantic segmentation ground truth Other information that's included in the original dicts, such as: "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: list[dict]: Each dict is the output for one input image. The dict contains one key "sem_seg" whose value is a Tensor of the output resolution that represents the per-pixel segmentation prediction. """ images = [x["image"].to(self.device) for x in batched_inputs] images = [self.normalizer(x) for x in images] images = ImageList.from_tensors(images, self.backbone.size_divisibility) # step_rate: a float, calculated by current_step/total_step, # This parameter is used for Scheduled Drop Path. step_rate = self.iter * 1.0 / self.max_iter self.iter += 1 features, expt_flops, real_flops = self.backbone( images.tensor, step_rate) if "sem_seg" in batched_inputs[0]: targets = [x["sem_seg"].to(self.device) for x in batched_inputs] targets = ImageList.from_tensors( targets, self.backbone.size_divisibility, False, self.sem_seg_head.ignore_value).tensor else: targets = None results, losses = self.sem_seg_head(features, targets) # calculate flops real_flops += self.sem_seg_head.flops # remove grad, avoid adding flops to the loss sum real_flops = real_flops.detach().requires_grad_(False) expt_flops = expt_flops.detach().requires_grad_(False) flops = {'real_flops': real_flops, 'expt_flops': expt_flops} # use budget constraint for training if self.training: if self.constrain_on and step_rate >= self.unupdate_rate: warm_up_rate = min(1.0, (step_rate - self.unupdate_rate) / 0.02) loss_budget = self.budget_constrint(expt_flops, warm_up_rate=warm_up_rate) losses.update({'loss_budget': loss_budget}) losses.update(flops) return losses processed_results = [] for result, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes): height = input_per_image.get("height") width = input_per_image.get("width") r = sem_seg_postprocess(result, image_size, height, width) processed_results.append({"sem_seg": r, "flops": flops}) return processed_results