def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances (optional): groundtruth :class:`Instances` * proposals (optional): :class:`Instances`, precomputed proposals. Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: list[dict]: Each dict is the output for one input image. The dict contains one key "instances" whose value is a :class:`Instances`. The :class:`Instances` object has the following keys: "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints" """ if not self.training: return self.inference(batched_inputs) images = self.preprocess_image(batched_inputs) if "instances" in batched_inputs[0]: gt_instances = [x["instances"].to(self.device) for x in batched_inputs] elif "targets" in batched_inputs[0]: log_first_n( logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 ) gt_instances = [x["targets"].to(self.device) for x in batched_inputs] else: gt_instances = None features = self.backbone(images.tensor) if self.proposal_generator: proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) else: assert "proposals" in batched_inputs[0] proposals = [x["proposals"].to(self.device) for x in batched_inputs] proposal_losses = {} _, detector_losses = self.roi_heads(images, features, proposals, gt_instances) if self.vis_period > 0: storage = get_event_storage() if storage.iter % self.vis_period == 0: self.visualize_training(batched_inputs, proposals) losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses
def __getattr__(self, key): if key in self._RENAMED: log_first_n( logging.WARNING, "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]), n=10, ) return getattr(self, self._RENAMED[key]) raise AttributeError( "Attribute '{}' does not exist in the metadata of '{}'. Available keys are {}.".format( key, self.name, str(self.__dict__.keys()) ) )
def forward(self, batched_inputs): """ Args: Same as in :class:`GeneralizedRCNN.forward` Returns: list[dict]: Each dict is the output for one input image. The dict contains one key "proposals" whose value is a :class:`Instances` with keys "proposal_boxes" and "objectness_logits". """ images = [x["image"].to(self.device) for x in batched_inputs] images = [self.normalizer(x) for x in images] images = ImageList.from_tensors(images, self.backbone.size_divisibility) features = self.backbone(images.tensor) if "instances" in batched_inputs[0]: gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] elif "targets" in batched_inputs[0]: log_first_n( logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10) gt_instances = [ x["targets"].to(self.device) for x in batched_inputs ] else: gt_instances = None proposals, proposal_losses = self.proposal_generator( images, features, gt_instances) # In training, the proposals are not useful at all but we generate them anyway. # This makes RPN-only models about 5% slower. if self.training: return proposal_losses processed_results = [] for results_per_image, input_per_image, image_size in zip( proposals, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"proposals": r}) return processed_results
def __setattr__(self, key, val): if key in self._RENAMED: log_first_n( logging.WARNING, "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]), n=10, ) setattr(self, self._RENAMED[key], val) # Ensure that metadata of the same name stays consistent try: oldval = getattr(self, key) assert oldval == val, ( "Attribute '{}' in the metadata of '{}' cannot be set " "to a different value!\n{} != {}".format(key, self.name, oldval, val) ) except AttributeError: super().__setattr__(key, val)
def print_instances_class_histogram(dataset_dicts, class_names): """ Args: dataset_dicts (list[dict]): list of dataset dicts. class_names (list[str]): list of class names (zero-indexed). """ num_classes = len(class_names) hist_bins = np.arange(num_classes + 1) histogram = np.zeros((num_classes, ), dtype=np.int) for entry in dataset_dicts: annos = entry["annotations"] classes = [x["category_id"] for x in annos if not x.get("iscrowd", 0)] histogram += np.histogram(classes, bins=hist_bins)[0] N_COLS = min(6, len(class_names) * 2) def short_name(x): # make long class names shorter. useful for lvis if len(x) > 13: return x[:11] + ".." return x data = list( itertools.chain( *[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)])) total_num_instances = sum(data[1::2]) data.extend([None] * (N_COLS - (len(data) % N_COLS))) if num_classes > 1: data.extend(["total", total_num_instances]) data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)]) table = tabulate( data, headers=["category", "#instances"] * (N_COLS // 2), tablefmt="pipe", numalign="left", stralign="center", ) log_first_n( logging.INFO, "Distribution of instances among all {} categories:\n".format( num_classes) + colored(table, "cyan"), key="message", )
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DetectionTransform` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: image: Tensor, image in (C, H, W) format. instances: Instances Other information that's included in the original dicts, such as: "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: losses (dict[str: Tensor]): mapping from a named loss to a tensor storing the loss. Used during training only. """ images = self.preprocess_image(batched_inputs) if "instances" in batched_inputs[0]: gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] elif "targets" in batched_inputs[0]: log_first_n( logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10) gt_instances = [ x["targets"].to(self.device) for x in batched_inputs ] else: gt_instances = None features = self.backbone(images.tensor) features = [features[f] for f in self.in_features] # apply the TensorMask head pred_logits, pred_deltas, pred_masks = self.head(features) # generate anchors based on features, is it image specific? anchors, unit_lengths, indexes = self.anchor_generator(features) if self.training: # get ground truths for class labels and box targets, it will label each anchor gt_class_info, gt_delta_info, gt_mask_info, num_fg = self.get_ground_truth( anchors, unit_lengths, indexes, gt_instances) # compute the loss return self.losses( gt_class_info, gt_delta_info, gt_mask_info, num_fg, pred_logits, pred_deltas, pred_masks, ) else: # do inference to get the output results = self.inference(pred_logits, pred_deltas, pred_masks, anchors, indexes, images) processed_results = [] for results_im, input_im, image_size in zip( results, batched_inputs, images.image_sizes): height = input_im.get("height", image_size[0]) width = input_im.get("width", image_size[1]) # this is to do post-processing with the image size result_box, result_mask = results_im r = _postprocess(result_box, result_mask, height, width) processed_results.append({"instances": r}) return processed_results
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: dict[str: Tensor]: mapping from a named loss to a tensor storing the loss. Used during training only. """ images = self.preprocess_image(batched_inputs) if "instances" in batched_inputs[0]: gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] elif "targets" in batched_inputs[0]: log_first_n( logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10) gt_instances = [ x["targets"].to(self.device) for x in batched_inputs ] else: gt_instances = None features = self.backbone(images.tensor) features = [features[f] for f in self.in_features] box_cls, box_delta = self.head(features) anchors = self.anchor_generator(features) if self.training: gt_classes, gt_anchors_reg_deltas = self.get_ground_truth( anchors, gt_instances) losses = self.losses(gt_classes, gt_anchors_reg_deltas, box_cls, box_delta) if self.vis_period > 0: storage = get_event_storage() if storage.iter % self.vis_period == 0: results = self.inference(box_cls, box_delta, anchors, images.image_sizes) self.visualize_training(batched_inputs, results) return losses else: results = self.inference(box_cls, box_delta, anchors, images.image_sizes) processed_results = [] for results_per_image, input_per_image, image_size in zip( results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results