def forward(self, image_list, feature_maps): # type: (ImageList, List[Tensor]) if torchvision._is_tracing(): # For onnx export, Python int can only be traced as Constant from torch.onnx import operators grid_sizes = list([ operators.shape_as_tensor(feature_map)[-2:] for feature_map in feature_maps ]) image_size = operators.shape_as_tensor(image_list.tensors)[-2:] strides = [image_size / g for g in grid_sizes] else: grid_sizes = list( [feature_map.shape[-2:] for feature_map in feature_maps]) image_size = image_list.tensors.shape[-2:] strides = [[int(image_size[0] / g[0]), int(image_size[1] / g[1])] for g in grid_sizes] dtype, device = feature_maps[0].dtype, feature_maps[0].device self.set_cell_anchors(dtype, device) anchors_over_all_feature_maps = self.cached_grid_anchors( grid_sizes, strides) anchors = torch.jit.annotate(List[List[torch.Tensor]], []) for i, (image_height, image_width) in enumerate(image_list.image_sizes): anchors_in_image = [] for anchors_per_feature_map in anchors_over_all_feature_maps: anchors_in_image.append(anchors_per_feature_map) anchors.append(anchors_in_image) anchors = [ torch.cat(anchors_per_image) for anchors_per_image in anchors ] # Clear the cache in case that memory leaks. self._cache.clear() return anchors
def decode_batch_loop_helper(bboxes, probs, criteria, max_output): bboxes_out = torch.jit.annotate(List[Tensor], []) scores_out = torch.jit.annotate(List[Tensor], []) labels_out = torch.jit.annotate(List[Tensor], []) for i in range(probs.size(1)): # skip background if i != 0: scores_per_label = probs[:, i] mask = scores_per_label > 0.05 bboxes_masked, scores_masked = bboxes[ mask, :], scores_per_label[mask] print('decode single iter scores masked:', scores_masked, scores_masked.shape) num_selected = operators.shape_as_tensor( scores_masked)[0].unsqueeze(0) k = torch.min(torch.cat((max_output, num_selected), 0)) _, sorted_idx = scores_masked.topk(k, dim=0) bboxes_masked = bboxes_masked[sorted_idx] scores_masked = scores_masked[sorted_idx] out_idx = torch.ops.roi_ops.nms(bboxes_masked, scores_masked, criteria) bboxes_out.append(bboxes_masked[out_idx]) scores_out.append(scores_masked[out_idx]) labels_out.append(torch.full(out_idx.shape, i, dtype=torch.long)) print('decode single iter output:', scores_out[-1], labels_out[-1]) # return top max_output bboxes_out = torch.cat(bboxes_out, dim=0) labels_out = torch.cat(labels_out, dim=0) scores_out = torch.cat(scores_out, dim=0) return bboxes_out, labels_out, scores_out
def topk(x, k, dim=None, **kwargs): from torch.onnx import operators, is_in_onnx_export if dim is None: dim = x.dim() - 1 if is_in_onnx_export(): n = operators.shape_as_tensor(x)[dim].unsqueeze(0) if not isinstance(k, torch.Tensor): k = torch.tensor([k], dtype=torch.long) # Workaround for ONNXRuntime: convert values to int to get minimum. n = torch.min(torch.cat((k, n), dim=0).int()).long() # ONNX OpSet 10 does not support non-floating point input for TopK. original_dtype = x.dtype require_cast = original_dtype not in { torch.float16, torch.float32, torch.float64 } if require_cast: x = x.to(torch.float32) values, keep = torch.topk(x, n, dim=dim, **kwargs) if require_cast: values = values.to(original_dtype) else: values, keep = torch.topk( x, min(int(k), x.shape[dim]), dim=dim, **kwargs) return values, keep
def forward(self, feats, rois, roi_scale_factor=None): from torch.onnx import operators if len(feats) == 1: return self.roi_layers[0](feats[0], rois) num_levels = len(feats) target_lvls = self.map_roi_levels(rois, num_levels) if roi_scale_factor is not None: rois = self.roi_rescale(rois, roi_scale_factor) indices = [] roi_feats = [] for level, (feat, extractor) in enumerate(zip(feats, self.roi_layers)): # Explicit casting to int is required for ONNXRuntime. level_indices = torch.nonzero( (target_lvls == level).int()).view(-1) level_rois = rois[level_indices] indices.append(level_indices) level_feats = extractor(feat, level_rois) roi_feats.append(level_feats) # Concatenate roi features from different pyramid levels # and rearrange them to match original ROIs order. indices = torch.cat(indices, dim=0) k = operators.shape_as_tensor(indices) _, indices = topk(indices, k, dim=0, largest=False) roi_feats = torch.cat(roi_feats, dim=0)[indices] return roi_feats
def _resize_image_and_masks_onnx(image, self_min_size, self_max_size, target): # type: (Tensor, float, float, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]] from torch.onnx import operators im_shape = operators.shape_as_tensor(image)[-2:] min_size = torch.min(im_shape).to(dtype=torch.float32) max_size = torch.max(im_shape).to(dtype=torch.float32) scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size) image = torch.nn.functional.interpolate(image[None], scale_factor=scale_factor, mode='bilinear', recompute_scale_factor=True, align_corners=False)[0] if target is None: return image, target if "masks" in target: mask = target["masks"] mask = F.interpolate(mask[:, None].float(), scale_factor=scale_factor, recompute_scale_factor=True)[:, 0].byte() target["masks"] = mask return image, target
def forward_export(self, imgs): from torch.onnx import operators img_shape = operators.shape_as_tensor(imgs[0]) imgs_per_gpu = int(imgs[0].size(0)) assert imgs_per_gpu == 1 self.img_metas[0][0]['img_shape'] = img_shape[2:4] return self.simple_test(imgs[0], self.img_metas[0], postprocess=False)
def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n): # type: (Tensor, int) -> Tuple[int, int] from torch.onnx import operators num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0) pre_nms_top_n = torch.min(torch.cat( (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype), num_anchors), 0)) return num_anchors, pre_nms_top_n
def decode_batch_with_multi_label_nms_trace(self, bboxes_in, scores_in, criteria=0.45, max_output=200, device=0): bboxes, probs = self.scale_back_batch(bboxes_in, scores_in, device) torch.ops.load_library( os.path.join(os.path.dirname(__file__), 'lib', 'custom_ops.cpython-37m-x86_64-linux-gnu.so')) # bboxes shape [batch, box num, 4] # probs shape [batch, box num, label num] probs = probs.permute(0, 2, 1) # probs shape [batch, label num, box num] # remove background probs = probs[:, 1:, :] selected_indices = torch.ops.roi_ops.multi_label_nms( bboxes, probs, torch.full((1, ), max_output, dtype=torch.long), torch.full((1, ), criteria, dtype=torch.float), torch.full((1, ), 0.05, dtype=torch.float)) labels = selected_indices[:, 1] box_indices = selected_indices[:, 2] scores_out = probs.reshape(-1)[labels * operators.shape_as_tensor(probs)[2] + box_indices] # return top max_output num_selected = operators.shape_as_tensor(scores_out)[0].unsqueeze(0) k = torch.min( torch.cat( (torch.tensor([max_output], dtype=torch.long), num_selected), 0)) _, max_ids = scores_out.topk(k, dim=0) bboxes = bboxes.squeeze(0)[ box_indices.index_select(0, max_ids), :].unsqueeze(0) labels = labels.index_select(0, max_ids).unsqueeze(0) + 1 scores_out = scores_out.index_select(0, max_ids).unsqueeze(0) return bboxes, labels, scores_out
def _onnx_get_num_anchors_and_pre_nms_top_n( ob: Tensor, orig_pre_nms_top_n: int) -> Tuple[int, int]: from torch.onnx import operators num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0) pre_nms_top_n = torch.min( torch.cat((torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype), num_anchors), 0)) # for mypy we cast at runtime return cast(int, num_anchors), cast(int, pre_nms_top_n)
def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n): from torch.onnx import operators num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0) # TODO : remove cast to IntTensor/num_anchors.dtype when # ONNX Runtime version is updated with ReduceMin int64 support pre_nms_top_n = torch.min( torch.cat((torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype), num_anchors), 0).to(torch.int32)).to(num_anchors.dtype) return num_anchors, pre_nms_top_n
def detections_to_keep_onnx(self, scores): from torch.onnx import operators number_of_detections = operators.shape_as_tensor(scores) number_to_keep = torch.min( torch.cat((torch.tensor([self.detections_per_img], dtype=torch.long), number_of_detections), 0)) _, keep = torch.topk(scores, number_to_keep, dim=0, sorted=True) return keep
def forward_export(self, imgs): from torch.onnx.operators import shape_as_tensor assert self.img_metas, 'Error: forward_export should be called inside forward_export_context' img_shape = shape_as_tensor(imgs[0]) imgs_per_gpu = int(imgs[0].size(0)) assert imgs_per_gpu == 1 assert len(self.img_metas[0] ) == imgs_per_gpu, f'self.img_metas={self.img_metas}' self.img_metas[0][0]['img_shape'] = img_shape[2:4] return self.simple_test(imgs[0], self.img_metas[0], postprocess=False)
def _resize_image_onnx(image, self_min_size, self_max_size): # type: (Tensor, float, float) -> Tensor from torch.onnx import operators im_shape = operators.shape_as_tensor(image)[-2:] min_size = torch.min(im_shape).to(dtype=torch.float32) max_size = torch.max(im_shape).to(dtype=torch.float32) scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size) image = torch.nn.functional.interpolate( image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True, align_corners=False)[0] return image
def anchor_generator_forward_patch(self, image_list_tensors, image_list_image_sizes, feature_maps): if torchvision._is_tracing(): from torch.onnx import operators grid_sizes = list([ operators.shape_as_tensor(feature_map)[-2:] for feature_map in feature_maps ]) image_size = operators.shape_as_tensor(image_list_tensors)[-2:] strides = [image_size / g for g in grid_sizes] else: grid_sizes = list( [feature_map.shape[-2:] for feature_map in feature_maps]) image_size = image_list_tensors.shape[-2:] strides = [[int(image_size[0] / g[0]), int(image_size[1] / g[1])] for g in grid_sizes] # TracerWarning: Converting a tensor to a Python integer dtype, device = feature_maps[0].dtype, feature_maps[0].device self.set_cell_anchors(dtype, device) # return self.cell_anchors # Ignore cache first because when we exporting, we only run one batch # anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides) anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides) # return anchors_over_all_feature_maps anchors = torch.jit.annotate(List[List[torch.Tensor]], []) # for i, (image_height, image_width) in enumerate(image_list.image_sizes): # num of images is constant?? loop over a dimension, N, so N can not be dynamic dimension for hw in image_list_image_sizes: anchors_in_image = [] for anchors_per_feature_map in anchors_over_all_feature_maps: anchors_in_image.append(anchors_per_feature_map) anchors.append(anchors_in_image) anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors] return anchors
def get_shape_from_feature_map(x): """Get spatial resolution of input feature map considering exporting to onnx mode. Args: x (torch.Tensor): Input tensor, shape (N, C, H, W) Returns: torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2) """ if torch.onnx.is_in_onnx_export(): img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to( x.device).float() else: img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to( x.device).float() return img_shape
def select_over_all_levels(self, boxlists): num_images = len(boxlists) # different behavior during training and during testing: # during training, post_nms_top_n is over *all* the proposals combined, while # during testing, it is over the proposals for each image # NOTE: it should be per image, and not per batch. However, to be consistent # with Detectron, the default is per batch (see Issue #672) if self.training and self.fpn_post_nms_per_batch: objectness = torch.cat( [boxlist.get_field("objectness") for boxlist in boxlists], dim=0) box_sizes = [len(boxlist) for boxlist in boxlists] post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness)) _, inds_sorted = torch.topk(objectness, post_nms_top_n, dim=0, sorted=True) inds_mask = torch.zeros_like(objectness, dtype=torch.uint8) inds_mask[inds_sorted] = 1 inds_mask = inds_mask.split(box_sizes) for i in range(num_images): boxlists[i] = boxlists[i][inds_mask[i]] else: for i in range(num_images): objectness = boxlists[i].get_field("objectness") if self.onnx_export: from torch.onnx import operators objectness_len = operators.shape_as_tensor(objectness) post_nms_top_n = torch.min( torch.cat( (torch.tensor([self.fpn_post_nms_top_n], dtype=torch.long), objectness_len), 0)) else: post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness)) _, inds_sorted = torch.topk(objectness, post_nms_top_n, dim=0, sorted=True) boxlists[i] = boxlists[i][inds_sorted] return boxlists
def forward(self, images_tensors, images_image_sizes, features): features = list(features.values()) objectness, pred_bbox_deltas = self.head(features) num_anchors_per_level = [ o[0].numel() for o in objectness ] # of anchors / feat level before concat from torch.onnx.operators import shape_as_tensor num_anchors_per_level_shape_tensors = [ shape_as_tensor(o[0]) for o in objectness ] num_anchors_per_level_fixed = [ s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors ] # print(num_anchors_per_level_shape_tensors) # num_anchors_per_level_fixed = [s.prod() for s in num_anchors_per_level_shape_tensors] # Could not find an implementation for the node ReduceProd(11) # print(num_anchors_per_level_fixed) # A list of tensors objectness, pred_bbox_deltas = concat_box_prediction_layers( objectness, pred_bbox_deltas) anchors = self.anchor_generator(images_tensors, images_image_sizes, features) proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors) num_images = len(images_image_sizes) proposals = proposals.view(num_images, -1, 4) # PSX exporting debug boxes, scores = self.rpn.filter_proposals( proposals, objectness, images_image_sizes, num_anchors_per_level_fixed) return boxes
def _get_shape_onnx(image: Tensor) -> Tensor: from torch.onnx import operators return operators.shape_as_tensor(image)[-2:]
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W if self.onnx_export: from torch.onnx import operators num_anchors = operators.shape_as_tensor(objectness)[1].unsqueeze(0) pre_nms_top_n = torch.min( torch.cat((torch.tensor([self.pre_nms_top_n], dtype=torch.long), num_anchors), 0)) else: pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] if self.onnx_export: # NOTE: for now only batch == 1 is supported for ONNX export. assert topk_idx.size(0) == 1 topk_idx = topk_idx.squeeze(0) box_regression = box_regression.index_select(1, topk_idx) else: box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) if self.onnx_export: concat_anchors = concat_anchors.reshape(N, -1, 4).index_select( 1, topk_idx) else: concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size, self.onnx_export) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def decode_batch_with_nms_trace(self, bboxes_in, scores_in, criteria=0.45, max_output=200, device=0): bboxes, probs = self.scale_back_batch(bboxes_in, scores_in, device) torch.ops.load_library( os.path.join(os.path.dirname(__file__), 'lib', 'custom_ops.cpython-37m-x86_64-linux-gnu.so')) assert bboxes.size(0) == 1, 'batch size must be 1' bboxes = bboxes.squeeze(0) probs = probs.squeeze(0) # for each label bboxes_out = [] scores_out = [] labels_out = [] # bboxes shape [box num, 4] # probs shape [box num, label num] for i in range(probs.size(1)): # skip background if i == 0: continue scores_per_label = probs[:, i] mask = scores_per_label > 0.05 bboxes_masked, scores_masked = bboxes[ mask, :], scores_per_label[mask] # print('decode single iter scores masked:', scores_masked, scores_masked.shape) num_selected = operators.shape_as_tensor( scores_masked)[0].unsqueeze(0) k = torch.min( torch.cat((torch.tensor([max_output], dtype=torch.long), num_selected), 0)) _, sorted_idx = scores_masked.topk(k, dim=0) bboxes_masked = bboxes_masked[sorted_idx] scores_masked = scores_masked[sorted_idx] out_idx = torch.ops.roi_ops.nms(bboxes_masked, scores_masked, criteria) bboxes_out.append(bboxes_masked[out_idx]) scores_out.append(scores_masked[out_idx]) labels_out.append(torch.full_like(out_idx, i, dtype=torch.long)) # print('decode single iter output:', scores_out[-1], labels_out[-1]) # return top max_output bboxes_out = torch.cat(bboxes_out, dim=0) labels_out = torch.cat(labels_out, dim=0) scores_out = torch.cat(scores_out, dim=0) num_selected = operators.shape_as_tensor(scores_out)[0].unsqueeze(0) k = torch.min( torch.cat( (torch.tensor([max_output], dtype=torch.long), num_selected), 0)) _, max_ids = scores_out.topk(k, dim=0) return bboxes_out[max_ids, :].unsqueeze(0), labels_out[ max_ids].unsqueeze(0), scores_out[max_ids].unsqueeze(0)
def forward_for_single_feature_map(self, anchors, objectness, box_regression, cls): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape ### # show heat map ### # import matplotlib.pyplot as plt # import cv2 # import numpy as np # img = cv2.imread("/home/w/workspace/onnx/maskrcnn-benchmark/demo/test_yolo.jpg") # img = cv2.resize(img, (416, 416)) # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # temp = objectness[:, 0].cpu()[0].numpy() * 255 # temp = temp.astype(np.uint8) # temp = cv2.resize(temp, (416, 416)) # img = cv2.addWeighted(img, 0.5, temp, 0.5, 1) # # plt.imshow(img) # plt.show() ### # show heat map end ### N, AXC, H, W = cls.shape C = int(AXC / A) # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() cls = permute_and_flatten(cls, N, A, C, H, W) box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W if self.onnx_export: from torch.onnx import operators num_anchors = operators.shape_as_tensor(objectness)[1].unsqueeze(0) pre_nms_top_n = torch.min( torch.cat((torch.tensor([self.pre_nms_top_n], dtype=torch.long), num_anchors), 0)) else: pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] if self.onnx_export: # NOTE: for now only batch == 1 is supported for ONNX export. assert topk_idx.size(0) == 1 topk_idx = topk_idx.squeeze(0) box_regression = box_regression.index_select(1, topk_idx) else: box_regression = box_regression[batch_idx, topk_idx] cls = cls[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) if self.onnx_export: concat_anchors = concat_anchors.reshape(N, -1, 4).index_select( 1, topk_idx) else: concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) cls = torch.argmax(cls, -1) + 1 result = [] for proposal, score, c, im_shape in zip(proposals, objectness, cls, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("scores", score) boxlist.add_field("labels", c) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size, self.onnx_export) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="scores", ) result.append(boxlist) return result
def forward(self, images, features, targets=None): # type: (ImageList, Dict[str, Tensor], Optional[List[Dict[str, Tensor]]]) """ Arguments: images (ImageList): images for which we want to compute the predictions features (List[Tensor]): features computed from the images that are used for computing the predictions. Each tensor in the list correspond to different feature levels targets (List[Dict[Tensor]]): ground-truth boxes present in the image (optional). If provided, each element in the dict should contain a field `boxes`, with the locations of the ground-truth boxes. Returns: boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per image. losses (Dict[Tensor]): the losses for the model during training. During testing, it is an empty dict. """ # RPN uses all feature maps that are available features = list(features.values()) objectness, pred_bbox_deltas = self.head(features) anchors = self.anchor_generator(images, features) num_images = len(anchors) if torchvision._is_tracing(): # For onnx export(Split in _get_top_n_idx) from torch.onnx.operators import shape_as_tensor num_anchors_per_level_shape_tensors = [ shape_as_tensor(o[0]) for o in objectness ] num_anchors_per_level = [ s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors ] # tensor.prod() => ReduceProd. ReduceProd can not be run by current runtime. # This is a above is a naive WAR else: num_anchors_per_level = [o[0].numel() for o in objectness] objectness, pred_bbox_deltas = \ concat_box_prediction_layers(objectness, pred_bbox_deltas) # apply pred_bbox_deltas to anchors to obtain the decoded proposals # note that we detach the deltas because Faster R-CNN do not backprop through # the proposals proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors) proposals = proposals.view(num_images, -1, 4) boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level) losses = {} if self.training: assert targets is not None labels, matched_gt_boxes = self.assign_targets_to_anchors( anchors, targets) regression_targets = self.box_coder.encode(matched_gt_boxes, anchors) loss_objectness, loss_rpn_box_reg = self.compute_loss( objectness, pred_bbox_deltas, labels, regression_targets) losses = { "loss_objectness": loss_objectness, "loss_rpn_box_reg": loss_rpn_box_reg, } return boxes, losses