def fast_rcnn_inference_single_image(boxes, scores, attr_scores, image_shape, score_thresh, nms_thresh, topk_per_image): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ # Make sure boxes and scores don't contain infinite or Nan valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) \ & torch.isfinite(attr_scores).all(dim=1) # Get scores from finite boxes and scores if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] attr_scores = attr_scores[valid_mask] scores = scores[:, :-1] # Remove background class? num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # If using Attributes class: # attributes = Attributes(attributes.reshape(-1, 295)) # attributes = attributes.tensor.view(-1, num_bbox_reg_classes, 295) # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, attr_scores, filter_inds, = boxes[keep], scores[ keep], attr_scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.attr_scores = attr_scores result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def trend_rcnn_inference_single_image(boxes, scores, attributes, image_shape, score_thresh, nms_thresh, topk_per_image, attr_score_thresh, num_attr_classes, max_attr_pred): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] attributes = attributes[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 #print("Printing the number of classes in the box: ", num_bbox_reg_classes) # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 num_attr_reg_classes = attributes.shape[1] // num_attr_classes # [ANMOL] this just prints the number of object classes that we have... here its 46 attributes = attributes.view(-1, num_attr_reg_classes, num_attr_classes) # [ANMOL] reshaped the attributes [proposals, objectclass, attrclass] # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # filter mask shape is same as score shape: [proposals, obj classes] # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() # there would be more indices/proposals after this compared as more number of scores might be > # greater than threshold would be interesting to check how it would work class agnostic attr classification # might fail there.. In the current example: R=1000, but R'=45806 #print("filter ind shape: ", filter_inds.shape) if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] #before this scores shape was [R,num_classes], after filter mask it will just convert to [R'] if num_attr_reg_classes == 1: attributes = attributes[filter_inds[:, 0], 0] else: attributes = attributes[filter_mask] #BOTH of these should produce attribute of shape [R', attr_classes] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds, attributes = boxes[keep], scores[ keep], filter_inds[keep], attributes[keep] attributes[attributes < attr_score_thresh] = 0 attr_scores_sorted, attr_indices = torch.sort(attributes, 1, descending=True) attr_indices[attr_scores_sorted < attr_score_thresh] = 294 attributes_inds = attr_indices[:, 0:max_attr_pred] #del attr_indices result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.attr_scores = attributes result.attr_classes = attributes_inds result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]