def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     light=None):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    if type(light) == type(boxes):
        # print(light)
        light = Boxes(light.reshape(-1, 4))
        # light.clip(image_shape)
        light = light.tensor.view(-1, num_bbox_reg_classes, 4)

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
        if type(light) == type(boxes):
            light = light[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
        if type(light) == type(boxes):
            light = light[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    if type(light) == type(boxes):
        light = light[keep]
    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    if type(light) == type(boxes):
        result.pred_light = Boxes(light)
    return result, filter_inds[:, 0]
Beispiel #2
0
    def _get_class_predictions(self, boxes, scores, image_shape):

        num_bbox_reg_classes = boxes.shape[1] // 4

        # Convert to Boxes to use the `clip` function ...
        boxes = Boxes(boxes.reshape(-1, 4))
        boxes.clip(image_shape)
        boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

        # Filter results based on detection scores
        filter_mask = scores > self.class_score_thresh_test

        # R' x 2. First column contains indices of the R predictions;
        # Second column contains indices of classes.
        class_inds = filter_mask.nonzero()
        if num_bbox_reg_classes == 1:
            boxes = boxes[class_inds[:, 0], 0]
        else:
            boxes = boxes[filter_mask]
        scores = scores[filter_mask]

        # Apply per-class NMS
        keep_class = batched_nms(boxes, scores, class_inds[:, 1],
                                 self.class_nms_thresh_test)
        if self.topk_per_image_test >= 0:
            keep_class = keep_class[:self.topk_per_image_test]

        boxes, scores, class_inds = boxes[keep_class], scores[
            keep_class], class_inds[keep_class]

        return boxes, scores, class_inds
Beispiel #3
0
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh,
                                     nms_thresh, topk_per_image):
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Select max scores
    max_scores, max_classes = scores.max(1)  # R x C --> R
    num_objs = boxes.size(0)
    boxes = boxes.view(-1, 4)
    idxs = torch.arange(num_objs).cuda() * num_bbox_reg_classes + max_classes
    max_boxes = boxes[idxs]  # Select max boxes according to the max scores.

    # Apply NMS
    keep = nms(max_boxes, max_scores, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores = max_boxes[keep], max_scores[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = max_classes[keep]

    return result, keep
Beispiel #4
0
    def fast_rcnn_inference_single_image(boxes, scores, image_shape,
                                         score_thresh, nms_thresh,
                                         topk_per_image, box_features):
        valid_mask = torch.isfinite(boxes).all(
            dim=1) & torch.isfinite(scores).all(dim=1)
        if not valid_mask.all():
            boxes = boxes[valid_mask]
            scores = scores[valid_mask]
            box_features = box_features[valid_mask]

        scores = scores[:, :-1]
        num_bbox_reg_classes = boxes.shape[1] // 4
        # Convert to Boxes to use the `clip` function ...
        boxes = Boxes(boxes.reshape(-1, 4))
        boxes.clip(image_shape)
        boxes = boxes.tensor.view(-1, 4)  # R x C x 4
        max_conf = torch.zeros((boxes.shape[0])).cuda()
        for cls_ind in range(0, scores.shape[1]):
            cls_scores = scores[:, cls_ind]
            # dets = torch.cat([boxes, cls_scores.view(-1, 1)], 1)
            keep = nms(boxes, cls_scores, 0.3)
            max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                         cls_scores[keep], max_conf[keep])
        keep_boxes = torch.where(max_conf >= 0.2)[0]
        if len(keep_boxes) < 36:
            keep_boxes = torch.argsort(max_conf, descending=True)[:36]
        elif len(keep_boxes) > 36:
            keep_boxes = keep_boxes[:36]
        boxes, scores = boxes[keep_boxes], scores[keep_boxes]
        box_features = box_features[keep_boxes]
        result = Instances(image_shape)
        result.pred_boxes = Boxes(boxes)
        result.scores = scores
        result.pred_classes = keep_boxes
        return result, keep_boxes, box_features
def fast_rcnn_inference_single_image(boxes, scores, attr_scores, image_shape,
                                     score_thresh, nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    # Make sure boxes and scores don't contain infinite or Nan
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) \
                                                  & torch.isfinite(attr_scores).all(dim=1)

    # Get scores from finite boxes and scores
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        attr_scores = attr_scores[valid_mask]

    scores = scores[:, :-1]  # Remove background class?
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # If using Attributes class:
    # attributes = Attributes(attributes.reshape(-1, 295))
    # attributes = attributes.tensor.view(-1, num_bbox_reg_classes, 295)

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()

    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, attr_scores, filter_inds, = boxes[keep], scores[
        keep], attr_scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.attr_scores = attr_scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Beispiel #6
0
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh,
                                     nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    Tscores = scores
    #print (scores)
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    #print (scores)

    # Apply per-class NMS
    uniclass = torch.zeros(len(filter_inds[:, 1].tolist())).cuda()
    #keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    keep = batched_nms(boxes, scores, uniclass, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    #print(filter_inds[:, 0])
    #print(torch.ByteTensor([0,1,0,0,1]))
    #print(filter_inds[:, 1])
    #print(keep)
    #print(Tscores[filter_inds[:, 0]])
    #print (scores)
    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    #result.scores = scores
    result.scores = Tscores[filter_inds[:, 0]]
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Beispiel #7
0
def fast_rcnn_inference_single_image(
    boxes,
    scores,
    image_shape: Tuple[int, int],
    score_thresh: float,
    nms_thresh: float,
    topk_per_image: int,
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)

    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    if len(category_disabler.global_cat_mask) > 0:
        print('<<<<<< category disabler activated >>>>>>')
        scores *= torch.tensor(category_disabler.global_cat_mask).cuda()
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # 1. Filter results based on detection scores. It can make NMS more efficient
    #    by filtering out low-confidence detections.
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # 2. Apply NMS for each class independently.
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Beispiel #8
0
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     allow_oob=False):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    if not allow_oob:
        boxes = Boxes(boxes.reshape(-1, 4))
        boxes.clip(image_shape)
        boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    else:
        boxes = boxes.view(-1, num_bbox_reg_classes, 4)

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    from torchvision.ops import nms

    keep = nms(boxes, scores, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Beispiel #9
0
def fast_rcnn_inference_single_image(
    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # 1. Filter results based on detection scores. It can make NMS more efficient
    #    by filtering out low-confidence detections.
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]


    # 2. Apply NMS for each class independently.
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    # DIOU NMS commented for now
    # keep = batched_diou_nms(boxes, scores, filter_inds[:, 1], nms_thresh) \
    #        if global_cfg.MODEL.ROI_BOX_HEAD.NMS_TYPE == "diou_nms" \
    #        else \
    #        batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)

    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Beispiel #10
0
def fast_rcnn_inference_single_image(
    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, fc_box_features=None,
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    probs = scores.clone().detach()
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]

    # Compact all fc layers into a single tensor to work nicely with Instance class for now
    if fc_box_features is not None:
        fc_box_features = [fc_layer_box_features[filter_inds[:, 0]] for fc_layer_box_features in fc_box_features]
        # will need to know number of layers and dimensions to unpack
        fc_box_features = torch.cat(fc_box_features, dim=1)
        result.fc_box_features = fc_box_features

    probs = probs[filter_inds[:, 0]]
    result.probs = probs

    return result, filter_inds[:, 0]
Beispiel #11
0
    def regress_and_classify(self, image: np.ndarray, tracklets: List[Tracklet]) -> Tuple[np.ndarray, np.ndarray]:
        # Convert boxes to proposals
        height, width = image.shape[:2]
        image = self.transform_gen.get_transform(image).apply_image(image)
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
        # Size of feature maps, used in the detector
        feat_height, feat_width = image.shape[1:3]
        scale_x = feat_width / width
        scale_y = feat_height / height
        proposal_boxes = Boxes(torch.tensor([tracklet.last_detection.box for tracklet in tracklets]))

        # Scale proposals to the same size as boxes
        proposal_boxes.scale(scale_x, scale_y)
        proposals = Instances((feat_height, feat_width), proposal_boxes=proposal_boxes)

        inputs = {"image": image, "height": height, "width": width, "proposals": proposals}

        images = self.model.preprocess_image([inputs])
        features = self.model.backbone(images.tensor)
        proposals = [inputs["proposals"].to(self.model.device)]

        # Extract features, perform RoI pooling and perform regression/classification for each RoI
        features_list = [features[f] for f in self.model.roi_heads.in_features]

        box_features = self.model.roi_heads.box_pooler(features_list, [x.proposal_boxes for x in proposals])
        box_features = self.model.roi_heads.box_head(box_features)
        pred_class_logits, pred_proposal_deltas = self.model.roi_heads.box_predictor(box_features)
        del box_features

        raw_outputs = FastRCNNOutputs(
            self.model.roi_heads.box_predictor.box2box_transform,
            pred_class_logits,
            pred_proposal_deltas,
            proposals,
            self.model.roi_heads.box_predictor.smooth_l1_beta,
        )

        # Convert raw outputs to predicted boxes and scores
        boxes = raw_outputs.predict_boxes()[0]
        scores = raw_outputs.predict_probs()[0]

        num_bbox_reg_classes = boxes.shape[1] // 4
        boxes = Boxes(boxes.reshape(-1, 4))
        # Scale regressed boxes to the same size as original image
        boxes.clip((feat_height, feat_width))
        boxes.scale(1 / scale_x, 1 / scale_y)
        boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)
        boxes = boxes[:, 0, :]
        scores = scores[:, 0]

        pred_boxes = boxes.detach().cpu().numpy()
        scores = scores.detach().cpu().numpy()
        return pred_boxes, scores
Beispiel #12
0
def fast_rcnn_inference_single_image(image_shape,
                                     boxes,
                                     scores,
                                     classes=None,
                                     score_thresh=0.05,
                                     nms_thresh=0.5,
                                     topk_per_image=1000):
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    replace_cls = classes is not None
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    if replace_cls:
        classes = classes[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    if replace_cls:
        result.pred_classes = classes[keep]
    else:
        result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Beispiel #13
0
def fast_rcnn_inference_single_image(
        boxes, scores, image_shape, nms_thresh, topk_per_image
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    class_distr_scores = scores.clone()
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Select max scores
    max_scores, max_classes = scores.max(1)  # R x C --> R
    num_objs = boxes.size(0)
    boxes = boxes.view(-1, 4)
    num_objs = torch.arange(num_objs)
    if torch.cuda.is_available():
        num_objs = num_objs.cuda()
    idxs = num_objs * num_bbox_reg_classes + max_classes
    max_boxes = boxes[idxs]  # Select max boxes according to the max scores.

    # Apply NMS
    keep = nms(max_boxes, max_scores, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores = max_boxes[keep], max_scores[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    class_distr_scores = class_distr_scores[keep]
    # we set the background probability to 0
    class_distr_scores[:, -1] = 0.0
    result.scores = class_distr_scores

    return result, keep
Beispiel #14
0
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh,
                                     nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """

    all_scores = scores.clone()
    all_scores = torch.unsqueeze(all_scores, 0)
    all_boxes = boxes.clone()
    all_boxes = torch.unsqueeze(all_boxes, 0)

    pred_inds = torch.unsqueeze(torch.arange(scores.size(0),
                                             device=scores.device,
                                             dtype=torch.long),
                                dim=1).repeat(1, scores.size(1))

    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        pred_inds = pred_inds[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    pred_inds = pred_inds[:, :-1]

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    pred_inds = pred_inds[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    pred_inds = pred_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    result.pred_inds = pred_inds
    return result, filter_inds[:, 0], all_scores, all_boxes
Beispiel #15
0
def eopsn_inference_single_image(
    boxes, scores, image_shape, objness_scores, score_thresh, nms_thresh, topk_per_image,
    use_unknown=False, num_classes=80, reverse_label_converter=None
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)


    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        objness_scores = objness_scores[valid_mask]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K

    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    classes = filter_inds[:,-1]
    classes[classes > len(reverse_label_converter)-1] = -1
    filter_inds[:,-1] = classes

    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    classes = filter_inds[:,-1]
    if reverse_label_converter is not None:
        classes = reverse_label_converter.to(classes.device)[classes]

    boxes = boxes[:topk_per_image]
    scores = scores[:topk_per_image]
    classes = classes[:topk_per_image]

    inds = filter_inds[:,0]
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = classes
    inds = inds[:topk_per_image]
    return result, inds
Beispiel #16
0
def fast_rcnn_inference_single_image_with_anchor(proposals, boxes, scores,
                                                 image_shape, score_thresh,
                                                 nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """

    anchors = proposals.get_fields()['anchor_boxes'].tensor
    proposals = proposals.get_fields()['proposal_boxes'].tensor
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        anchors = anchors[valid_mask]
        proposals = proposals[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    anchors = Boxes(anchors)
    proposals = Boxes(proposals)
    anchors.clip(image_shape)
    proposals.clip(image_shape)
    anchors = anchors.tensor
    proposals = proposals.tensor
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    anchors = anchors[filter_inds[:, 0]]
    proposals = proposals[filter_inds[:, 0]]
    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds, anchors, proposals = boxes[keep], scores[keep], filter_inds[keep], anchors[keep], \
                                                     proposals[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    result.anchors = Boxes(anchors)
    result.proposals = Boxes(proposals)
    return result, filter_inds[:, 0]
Beispiel #17
0
def trend_rcnn_inference_single_image(boxes, scores, attributes, image_shape,
                                      score_thresh, nms_thresh, topk_per_image,
                                      attr_score_thresh, num_attr_classes,
                                      max_attr_pred):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        attributes = attributes[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    #print("Printing the number of classes in the box: ", num_bbox_reg_classes)
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    num_attr_reg_classes = attributes.shape[1] // num_attr_classes
    # [ANMOL] this just prints the number of object classes that we have... here its 46
    attributes = attributes.view(-1, num_attr_reg_classes, num_attr_classes)
    # [ANMOL] reshaped the attributes [proposals, objectclass, attrclass]

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # filter mask shape is same as score shape: [proposals, obj classes]
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    # there would be more indices/proposals after this compared as more number of scores might be >
    # greater than threshold would be interesting to check how it would work class agnostic attr classification
    # might fail there.. In the current example: R=1000, but R'=45806
    #print("filter ind shape: ", filter_inds.shape)

    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    #before this scores shape was [R,num_classes], after filter mask it will just convert to [R']

    if num_attr_reg_classes == 1:
        attributes = attributes[filter_inds[:, 0], 0]
    else:
        attributes = attributes[filter_mask]
    #BOTH of these should produce attribute of shape [R', attr_classes]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds, attributes = boxes[keep], scores[
        keep], filter_inds[keep], attributes[keep]

    attributes[attributes < attr_score_thresh] = 0
    attr_scores_sorted, attr_indices = torch.sort(attributes,
                                                  1,
                                                  descending=True)
    attr_indices[attr_scores_sorted < attr_score_thresh] = 294
    attributes_inds = attr_indices[:, 0:max_attr_pred]
    #del attr_indices

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.attr_scores = attributes
    result.attr_classes = attributes_inds
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Beispiel #18
0
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     class_logits=None,
                                     estimate_uncertainty=False,
                                     variance=torch.Tensor([])):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.

    # Get box ID with predicted class label: [box id, class label]
    filter_inds = filter_mask.nonzero()

    import numpy as np
    class_id = np.argmax(scores.cpu().numpy(), axis=1)
    class_id = np.array([np.arange(1000), class_id])
    class_id = np.swapaxes(class_id, 1, 0)
    boxes_one_class = boxes[class_id[:, 0], class_id[:, 1], :].cpu().numpy()
    scores_one_class = np.max(scores.cpu().numpy(), axis=1)

    if not class_logits == None:
        class_logits = class_logits[filter_inds[:, 0]]
        predicted_probs = scores[filter_inds[:, 0]]

    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores_filtered = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores_filtered, filter_inds[:, 1], nms_thresh)

    if topk_per_image >= 0:
        keep = keep[:topk_per_image]

    boxes_final, scores_final, filter_inds_final = boxes[
        keep], scores_filtered[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes_final)
    result.scores = scores_final
    result.pred_classes = filter_inds_final[:, 1]
    # Jamie
    # Save out logits
    if not class_logits == None:
        #result.class_logits = class_logits[filter_inds_final[:,0]]
        result.class_logits = class_logits[keep]
        result.prob_score = predicted_probs[keep]
        #class_logits = class_logits[filter_inds_final[:,0]]
        #result.class_logits = class_logits[keep]

    if estimate_uncertainty:
        # std from 1000 proposals
        #stds = nms_calc_uncertainty(boxes_final.cpu().numpy(), scores_final.cpu().numpy(), boxes_one_class, scores_one_class, 0.75)
        # std from bbox with class confidence score higher than threshold
        stds = nms_calc_uncertainty(boxes_final.cpu().numpy(),
                                    scores_final.cpu().numpy(),
                                    boxes.cpu().numpy(),
                                    scores_filtered.cpu().numpy(), 0.9)
        result.stds = torch.Tensor(stds).cuda()

    if len(variance) > 0:
        result.vars = variance[keep]

    return result, filter_inds_final[:, 0]
Beispiel #19
0
def fast_rcnn_inference_single_image_with_overlap(
    boxes,
    scores,
    overlap_boxes,
    overlap_probs,
    image_shape,
    score_thresh,
    nms_thresh,
    topk_per_image,
    allow_oob=False,
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        overlap_boxes = overlap_boxes[valid_mask]
        overlap_probs = overlap_probs[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    if not allow_oob:
        boxes = Boxes(boxes.reshape(-1, 4))
        boxes.clip(image_shape)
        boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

        assert (overlap_boxes.size(1) == 4
                ), "overlap boxes prediction has no category, but: {}".format(
                    overlap_boxes.size())
        overlap_boxes = Boxes(overlap_boxes)
        overlap_boxes.clip(image_shape)
        overlap_boxes = overlap_boxes.tensor
    else:
        boxes = boxes.view(-1, num_bbox_reg_classes, 4)

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
        overlap_boxes = overlap_boxes[filter_inds[:, 0]]
    else:
        boxes = boxes[filter_mask]
        overlap_boxes = overlap_boxes[filter_inds[:, 0]]
    scores = scores[filter_mask]
    overlap_probs = overlap_probs[filter_mask]

    # Apply per-class NMS
    self_defined_nms_on = True  # False
    if self_defined_nms_on:
        boxes = np.ascontiguousarray(boxes.cpu())
        scores = np.ascontiguousarray(scores.cpu())
        overlap_probs = np.ascontiguousarray(overlap_probs.cpu())
        overlap_boxes = np.ascontiguousarray(overlap_boxes.cpu())

        keep = batched_noh_nms(boxes,
                               scores,
                               overlap_probs,
                               overlap_boxes,
                               Nt=nms_thresh,
                               thresh=0.01,
                               method=3)

        boxes = torch.from_numpy(boxes).cuda()
        scores = torch.from_numpy(scores).cuda()
        overlap_probs = torch.from_numpy(overlap_probs).cuda()
        overlap_boxes = torch.from_numpy(overlap_boxes).cuda()
        keep = keep[scores[keep].argsort(descending=True)]
    else:
        from torchvision.ops import nms

        keep = nms(boxes, scores, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, overlap_boxes, overlap_probs, filter_inds = (
        boxes[keep],
        scores[keep],
        overlap_boxes[keep],
        overlap_probs[keep],
        filter_inds[keep],
    )

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.overlap_boxes = Boxes(overlap_boxes)
    result.overlap_probs = overlap_probs
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Beispiel #20
0
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     vp_bins=None,
                                     vp=None,
                                     vp_res=None,
                                     rotated_box_training=False,
                                     h=None):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    if not rotated_box_training or len(boxes) == 0:
        keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    else:
        # BBox with encoding ctr_x,ctr_y,w,l
        if vp is not None and vp_bins is not None:
            _vp = vp.view(-1, num_bbox_reg_classes, vp_bins)  # R x C x bins
            _vp = _vp[filter_mask]
            if len(_vp) > 0:
                _, vp_max = torch.max(_vp, 1)
                vp_filtered = vp_max
                if vp_res is not None:
                    _vp_res = vp_res.view(-1, num_bbox_reg_classes, vp_bins)
                    _vp_res = _vp_res[filter_mask]
                    vp_res_filtered = list()
                    for i, k in enumerate(vp_max):
                        vp_res_filtered.append(_vp_res[i, k])
                else:
                    vp_filtered = _vp
            rboxes = []
            for i in range(boxes.shape[0]):
                box = boxes[i]
                angle = anglecorrection(vp_res_filtered[i] * 180 / math.pi).to(
                    box.device) if vp_res is not None else bin2ang(
                        vp_filtered[i], vp_bins).to(box.device)
                box = torch.cat((box, angle))
                rboxes.append(box)
            rboxes = torch.cat(rboxes).reshape(-1, 5).to(vp_filtered.device)
            #keep = nms_rotated(rboxes, scores, nms_thresh)
            keep = batched_nms_rotated(rboxes, scores, filter_inds[:, 1],
                                       nms_thresh)
        else:
            boxes[:, :, 2] = boxes[:, :, 2] + boxes[:, :, 0]  #x2
            boxes[:, :, 3] = boxes[:, :, 3] + boxes[:, :, 1]  #y2
            keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)

    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    if vp is not None and vp_bins is not None:
        vp = vp.view(-1, num_bbox_reg_classes, vp_bins)  # R x C x bins
        vp = vp[filter_mask]
        vp = vp[keep]
        if vp_res is not None:
            vp_res = vp_res.view(-1, num_bbox_reg_classes, vp_bins)
            vp_res = vp_res[filter_mask]
            vp_res = vp_res[keep]
        if len(vp) > 0:
            _, vp_max = torch.max(vp, 1)
            result.viewpoint = vp_max
            if vp_res is not None:
                vp_res_filtered = list()
                for i, k in enumerate(vp_max):
                    vp_res_filtered.append(vp_res[i, k])
                # This result is directly the yaw orientation predicted
                result.viewpoint_residual = torch.tensor(vp_res_filtered).to(
                    vp_max.device)
        else:
            result.viewpoint = vp
            result.viewpoint_residual = vp_res
    if h is not None:
        h = h.view(-1, num_bbox_reg_classes, 2)  # R x C x bins
        h = h[filter_mask]
        h = h[keep]
        result.height = h
    return result, filter_inds[:, 0]
Beispiel #21
0
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     device,
                                     preferred_labels=[],
                                     tokens=[],
                                     tokenizer=None):
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Select max scores
    max_scores, max_classes = scores.max(1)  # R x C --> R

    num_objs = boxes.size(0)
    boxes = boxes.view(-1, 4)
    idxs = torch.arange(num_objs).cuda(
        device) * num_bbox_reg_classes + max_classes
    max_boxes = boxes[idxs]  # Select max boxes according to the max scores.

    # Apply NMS
    keep = nms(max_boxes, max_scores, nms_thresh)
    # calculate the closes tokens
    words = get_image_labels2(preferred_labels, max_classes[keep].tolist(),
                              keep.tolist())
    relevant = []
    others = []
    class_list = []

    for word, c, i in words:
        tok = tokenizer.vocab.get(word, tokenizer.vocab["[UNK]"])
        ## inserting the relevant first
        if tok in tokens:
            relevant.append(i)
        ## repeated predictions go last.
        elif c in class_list:
            class_list.append(c)
            others.append(i)
        ## Inserting varied predictions first
        else:
            class_list.append(c)
            others.insert(i, 0)

    keep = torch.tensor(relevant + others, device=device)

    #remove duplicate classes......

    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
        keep = keep[torch.randperm(keep.size()[0])]
    boxes, scores = max_boxes[keep], max_scores[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = max_classes[keep]

    return result, keep
Beispiel #22
0
def fsod_fast_rcnn_inference_single_image(pred_cls, boxes, scores, image_shape,
                                          score_thresh, nms_thresh,
                                          topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fsod_fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fsod_fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)

    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        pred_cls = pred_cls[valid_mask]

    scores = scores[:, :-1]

    cls_num = pred_cls.unique().shape[0]
    box_num = int(scores.shape[0] / cls_num)

    scores = scores.reshape(cls_num, box_num).permute(1, 0)
    boxes = boxes.reshape(cls_num, box_num, 4).permute(1, 0,
                                                       2).reshape(box_num, -1)
    pred_cls = pred_cls.reshape(cls_num, box_num).permute(1, 0)

    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    pred_cls = pred_cls[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    #boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    boxes, scores, filter_inds, pred_cls = boxes[keep], scores[
        keep], filter_inds[keep], pred_cls[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    #result.pred_classes = filter_inds[:, 1]
    result.pred_classes = pred_cls

    return result, filter_inds[:, 0]
Beispiel #23
0
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     objness_scores,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     use_unknown=False,
                                     num_classes=80,
                                     reverse_label_converter=None):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)

    if reverse_label_converter is not None:
        ignore_void = reverse_label_converter[-1] == -1
    else:
        ignore_void = scores.shape[1] == num_classes + 1

    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        objness_scores = objness_scores[valid_mask]

    original_scores = scores.clone()
    if ignore_void:
        scores = scores[:, :-1]
    else:
        scores = scores[:, :-2]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    if scores.shape[1] > num_classes:
        filter_mask = scores[:, :-1] > score_thresh
    else:
        filter_mask = scores > score_thresh  # R x K
    if use_unknown:
        new_filter_mask = filter_mask.sum(-1) < 1
        if original_scores.shape[1] > num_classes + 1 or not ignore_void:
            new_filter_mask = torch.logical_and(
                new_filter_mask,
                original_scores.argmax(-1) == num_classes)
        objness_scores = objness_scores.sigmoid()
        obj_th = 0.500
        unknown_filter_mask = torch.logical_and(new_filter_mask,
                                                objness_scores > obj_th)
        unknown_filter_inds = unknown_filter_mask.nonzero()
        unknown_boxes = boxes[unknown_filter_inds[:, 0], 0]
        unknown_scores = objness_scores[unknown_filter_inds[:, 0]]
        keep = nms(unknown_boxes, unknown_scores, nms_thresh)
        keep = keep[:int(topk_per_image * 0.5)]
        unknown_boxes = unknown_boxes[keep]
        unknown_scores = unknown_scores[keep]
        unknown_filter_inds = unknown_filter_inds[keep]

    if scores.shape[1] > num_classes:
        scores = scores[:, :-1]

    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    if use_unknown:
        boxes = torch.cat((boxes, unknown_boxes), dim=0)
        scores = torch.cat((scores, unknown_scores), dim=0)
        if ignore_void:
            classes = torch.cat((filter_inds[:, 1], -torch.ones(
                len(unknown_scores), device=filter_inds.device).long()),
                                dim=0)
        else:
            classes = torch.cat((filter_inds[:, 1], -2 * torch.ones(
                len(unknown_scores), device=filter_inds.device).long()),
                                dim=0)

    else:
        classes = filter_inds[:, -1]
    if reverse_label_converter is not None:
        classes = reverse_label_converter.to(classes.device)[classes]

    boxes = boxes[:topk_per_image]
    scores = scores[:topk_per_image]
    classes = classes[:topk_per_image]

    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = classes
    inds = filter_inds[:, 0]
    if use_unknown:
        inds = torch.cat((inds, unknown_filter_inds[:, 0]))
    inds = inds[:topk_per_image]
    return result, inds
Beispiel #24
0
def fast_rcnn_inference_single_image_recon_recls(boxes,
                                                 scores,
                                                 image_shape,
                                                 score_thresh,
                                                 nms_thresh,
                                                 topk_per_image,
                                                 features,
                                                 mask_pooler,
                                                 mask_head,
                                                 recon_net=None,
                                                 alpha=2,
                                                 recls=None):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]

    scores = scores[filter_mask]

    # apply recon net
    mask_features = mask_pooler(features, [Boxes(boxes)])
    if mask_head.cfg.MODEL.ROI_HEADS.NAME == "StandardROIHeads":
        pred_mask_logits = mask_head(mask_features)
    else:
        results = Instances(image_shape)
        results.pred_classes = filter_inds[:, 1]

        pred_mask_logits, _, = mask_head(mask_features, [results])

    n = 1
    if recls and pred_mask_logits[0][1].size(0) != 0:
        if recls.rescoring:
            pred_visible_mask_logits = pred_mask_logits[1][1] if len(
                pred_mask_logits) > 1 else pred_mask_logits[0][1]
            pred_visible_mask_logits = get_pred_masks_logits_by_cls(
                pred_visible_mask_logits, filter_inds[:, 1])
            if recls.attention_mode == "mask":
                recls_logits = recls(mask_features * F.avg_pool2d(
                    (pred_visible_mask_logits > 0).float(), 2))
            else:
                recls_logits = recls(mask_features *
                                     F.avg_pool2d(pred_visible_mask_logits, 2))

            recls_prob = torch.softmax(recls_logits, dim=1)

            indices = torch.arange(recls_prob.size(0),
                                   device=recls_prob.device)
            # filter_inds[:, 1] = torch.argmax(recls_logits, dim=1)
            # scores = scores * (recls_logits[0][indices, filter_inds[:, 1]] * 0.3 + 0.7)
            scores = scores * (recls_prob[indices, filter_inds[:, 1]] * 0.4 +
                               0.6)
            n += 1

    if recon_net and pred_mask_logits[0][0].size(0):
        if recon_net.rescoring:
            mode = "normal"

            select = 1 if len(pred_mask_logits) == 2 else 0
            indices = torch.arange(pred_mask_logits[select][0].size(0),
                                   device=pred_mask_logits[select][0].device)
            pred_masks = (pred_mask_logits[select][0][indices,
                                                      filter_inds[:, 1]] >
                          0).unsqueeze(1).float()
            similiarity, recon_logits = get_similarity(pred_masks,
                                                       recon_net,
                                                       filter_inds,
                                                       post_process=mode)

            # similiarity_filter_l = ((scores > 0.6) * (similiarity > 0.8)).nonzero()
            # similiarity_filter_s = ((scores > 0.6) * (similiarity < 0.5)).nonzero()
            # if 64 > len(similiarity_filter_l) > 0:
            #     vis.images(cat([pred_masks[similiarity_filter_l[:, 0]], recon_logits[similiarity_filter_l[:, 0]]], dim=0),
            #                win_name="large similiarity:{}".format(len(similiarity_filter_l)),
            #                nrow=len(similiarity_filter_l[:, 0]))
            # if 64 > len(similiarity_filter_s) > 0:
            #     vis.images(cat([pred_masks[similiarity_filter_s[:, 0]], recon_logits[similiarity_filter_s[:, 0]]], dim=0),
            #                win_name="small similiarity:{}".format(len(similiarity_filter_s)),
            #                nrow=len(similiarity_filter_s[:, 0]))

            # Apply per-class NMS
            # print("sorted simi:{}".format(sorted(np.array(similiarity.cpu()))))
            # print("Scores changed")
            scores = scores * torch.relu(
                torch.log(
                    torch.FloatTensor([alpha]).to(similiarity.device) -
                    similiarity) /
                torch.log(torch.FloatTensor([alpha]).to(similiarity.device)))
            n += 1

    scores = scores**(1 / n)
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    results = Instances(image_shape)
    results.pred_boxes = Boxes(boxes)
    results.scores = scores
    results.pred_classes = filter_inds[:, 1]

    return results, filter_inds[:, 0]
Beispiel #25
0
def fast_rcnn_inference_single_image(
    boxes,
    scores,
    objectness_scores,  # OWT
    features,  # OWT
    image_shape: Tuple[int, int],
    score_thresh: float,
    nms_thresh: float,
    topk_per_image: int,
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    bg_scores = scores[:, -1]  # OWT
    scores = scores[:, :-1]
    num_classes = scores.shape[1]  # OWT
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # 1. Filter results based on detection scores. It can make NMS more efficient
    #    by filtering out low-confidence detections.
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # 2. Apply NMS for each class independently.
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    # OWT: Find out which row are kept in the original scores tensor (1000, 80+1),
    # and select the corresponding bg_scores
    keep_row = torch.div(keep, num_classes, rounding_mode='trunc')
    bg_scores = bg_scores[keep_row]
    objectness_scores = objectness_scores[keep_row]
    features = features[keep_row]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.bg_scores = bg_scores  # OWT
    result.objectness = objectness_scores  # OWT
    result.embeddings = features  # OWT
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]