def fast_rcnn_inference_single_image(boxes, scores, attr_scores, image_shape,
                                     score_thresh, nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    # Make sure boxes and scores don't contain infinite or Nan
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) \
                                                  & torch.isfinite(attr_scores).all(dim=1)

    # Get scores from finite boxes and scores
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        attr_scores = attr_scores[valid_mask]

    scores = scores[:, :-1]  # Remove background class?
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # If using Attributes class:
    # attributes = Attributes(attributes.reshape(-1, 295))
    # attributes = attributes.tensor.view(-1, num_bbox_reg_classes, 295)

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()

    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, attr_scores, filter_inds, = boxes[keep], scores[
        keep], attr_scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.attr_scores = attr_scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Exemple #2
0
def trend_rcnn_inference_single_image(boxes, scores, attributes, image_shape,
                                      score_thresh, nms_thresh, topk_per_image,
                                      attr_score_thresh, num_attr_classes,
                                      max_attr_pred):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        attributes = attributes[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    #print("Printing the number of classes in the box: ", num_bbox_reg_classes)
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    num_attr_reg_classes = attributes.shape[1] // num_attr_classes
    # [ANMOL] this just prints the number of object classes that we have... here its 46
    attributes = attributes.view(-1, num_attr_reg_classes, num_attr_classes)
    # [ANMOL] reshaped the attributes [proposals, objectclass, attrclass]

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # filter mask shape is same as score shape: [proposals, obj classes]
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    # there would be more indices/proposals after this compared as more number of scores might be >
    # greater than threshold would be interesting to check how it would work class agnostic attr classification
    # might fail there.. In the current example: R=1000, but R'=45806
    #print("filter ind shape: ", filter_inds.shape)

    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    #before this scores shape was [R,num_classes], after filter mask it will just convert to [R']

    if num_attr_reg_classes == 1:
        attributes = attributes[filter_inds[:, 0], 0]
    else:
        attributes = attributes[filter_mask]
    #BOTH of these should produce attribute of shape [R', attr_classes]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds, attributes = boxes[keep], scores[
        keep], filter_inds[keep], attributes[keep]

    attributes[attributes < attr_score_thresh] = 0
    attr_scores_sorted, attr_indices = torch.sort(attributes,
                                                  1,
                                                  descending=True)
    attr_indices[attr_scores_sorted < attr_score_thresh] = 294
    attributes_inds = attr_indices[:, 0:max_attr_pred]
    #del attr_indices

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.attr_scores = attributes
    result.attr_classes = attributes_inds
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]