def _forward_keypoint(self, features, instances): """ Forward logic of the keypoint prediction branch. Args: features (list[Tensor]): #level input features for keypoint prediction instances (list[Instances]): the per-image instances to train/predict keypoints. In training, they can be the proposals. In inference, they can be the predicted boxes. Returns: In training, a dict of losses. In inference, update `instances` with new fields "pred_keypoints" and return it. """ if not self.keypoint_on: return {} if self.training else instances num_images = len(instances) if self.training: # The loss is defined on positive proposals with at >=1 visible keypoints. proposals, _ = select_foreground_proposals(instances, self.num_classes) proposals = select_proposals_with_visible_keypoints(proposals) proposal_boxes = [x.proposal_boxes for x in proposals] keypoint_features = self.keypoint_pooler(features, proposal_boxes) keypoint_logits = self.keypoint_head(keypoint_features) normalizer = (num_images * self.batch_size_per_image * self.positive_sample_fraction * keypoint_logits.shape[1]) if self.keypoint_loss_type == "FiberKeypointLoss": loss = fiber_keypoint_rcnn_loss(keypoint_logits, proposals) else: loss = keypoint_rcnn_loss( keypoint_logits, proposals, normalizer=None if self.normalize_loss_by_visible_keypoints else normalizer, ) return {"loss_keypoint": loss * self.keypoint_loss_weight} else: pred_boxes = [x.pred_boxes for x in instances] keypoint_features = self.keypoint_pooler(features, pred_boxes) keypoint_logits = self.keypoint_head(keypoint_features) keypoint_rcnn_inference(keypoint_logits, instances) return instances
def forward_dp_keypoint(self, features, instances): if not self.dp_keypoint_on: return {} if self.training else instances num_images = len(instances) if self.training: proposals, _ = select_foreground_proposals(instances, self.num_classes) proposals = select_proposals_with_visible_keypoints(proposals) # proposals = self.keypoint_data_filter(proposals) proposal_boxes = [x.proposal_boxes for x in proposals] # print('after:',len(proposal_boxes)) # if len(proposal_boxes) == 0: # return {"loss_keypoint": 0.} # if len(proposal_boxes) > 0: if self.use_mid: features = [self.mid_decoder(features)] keypoint_features = self.densepose_pooler(features, proposal_boxes) keypoint_output = self.densepose_head(keypoint_features) keypoint_logits = self.keypoint_predictor(keypoint_output) # The loss is defined on positive proposals with at >=1 visible keypoints. normalizer = ( num_images * self.batch_size_per_image * self.positive_sample_fraction * keypoint_logits.shape[1] ) # loss = dp_keypoint_rcnn_loss( # keypoint_logits, # instances, # normalizer=None if self.normalize_loss_by_visible_keypoints else normalizer, # ) loss = keypoint_rcnn_loss( keypoint_logits, proposals, normalizer=None if self.normalize_loss_by_visible_keypoints else normalizer, ) return {"loss_keypoint": loss * self.keypoint_loss_weight} else: if self.use_mid: features = [self.mid_decoder(features)] pred_boxes = [x.pred_boxes for x in instances] keypoint_features = self.densepose_pooler(features, pred_boxes) keypoint_output = self.densepose_head(keypoint_features) keypoint_logits = self.keypoint_predictor(keypoint_output) keypoint_rcnn_inference(keypoint_logits, instances) return instances
def _forward_dp_keypoint(self, keypoint_logits, instances): if not self.dp_keypoint_on: return {} if self.training else instances num_images = len(instances) if self.training: # The loss is defined on positive proposals with at >=1 visible keypoints. normalizer = (num_images * self.batch_size_per_image * self.positive_sample_fraction * keypoint_logits.shape[1]) loss = dp_keypoint_rcnn_loss( keypoint_logits, instances, normalizer=None if self.normalize_loss_by_visible_keypoints else normalizer, ) return {"loss_keypoint": loss * self.keypoint_loss_weight} else: keypoint_rcnn_inference(keypoint_logits, instances) return instances
def assemble_rcnn_outputs_by_name(image_sizes, tensor_outputs, force_mask_on=False): """ A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor]) to detectron2's format (i.e. list of Instances instance). This only works when the model follows the Caffe2 detectron's naming convention. Args: image_sizes (List[List[int, int]]): [H, W] of every image. tensor_outputs (Dict[str, Tensor]): external_output to its tensor. force_mask_on (Bool): if true, the it make sure there'll be pred_masks even if the mask is not found from tensor_outputs (usually due to model crash) """ results = [Instances(image_size) for image_size in image_sizes] batch_splits = tensor_outputs.get("batch_splits", None) if batch_splits: raise NotImplementedError() assert len(image_sizes) == 1 result = results[0] bbox_nms = tensor_outputs["bbox_nms"] score_nms = tensor_outputs["score_nms"] class_nms = tensor_outputs["class_nms"] # Detection will always success because Conv support 0-batch assert bbox_nms is not None assert score_nms is not None assert class_nms is not None if bbox_nms.shape[1] == 5: result.pred_boxes = RotatedBoxes(bbox_nms) else: result.pred_boxes = Boxes(bbox_nms) result.scores = score_nms result.pred_classes = class_nms.to(torch.int64) mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None) if mask_fcn_probs is not None: # finish the mask pred mask_probs_pred = mask_fcn_probs num_masks = mask_probs_pred.shape[0] class_pred = result.pred_classes indices = torch.arange(num_masks, device=class_pred.device) mask_probs_pred = mask_probs_pred[indices, class_pred][:, None] result.pred_masks = mask_probs_pred elif force_mask_on: # NOTE: there's no way to know the height/width of mask here, it won't be # used anyway when batch size is 0, so just set them to 0. result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8) keypoints_out = tensor_outputs.get("keypoints_out", None) kps_score = tensor_outputs.get("kps_score", None) if keypoints_out is not None: # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob) keypoints_tensor = keypoints_out # NOTE: it's possible that prob is not calculated if "should_output_softmax" # is set to False in HeatmapMaxKeypoint, so just using raw score, seems # it doesn't affect mAP. TODO: check more carefully. keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]] result.pred_keypoints = keypoint_xyp elif kps_score is not None: # keypoint heatmap to sparse data structure pred_keypoint_logits = kps_score keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result]) return results