Exemple #1
0
def get_location_info(human_boxes, object_boxes, union_boxes):
    assert human_boxes.shape[1] == object_boxes.shape[1] == union_boxes.shape[
        1] == 4
    human_object_loc = box_utils.bbox_transform_inv(human_boxes, object_boxes)
    human_union_loc = box_utils.bbox_transform_inv(human_boxes, union_boxes)
    object_union_loc = box_utils.bbox_transform_inv(object_boxes, union_boxes)
    return np.concatenate(
        (human_object_loc, human_union_loc, object_union_loc), axis=1)
 def test_bbox_dataset_to_prediction_roundtrip(self):
     """Simulate the process of reading a ground-truth box from a dataset,
     make predictions from proposals, convert the predictions back to the
     dataset format, and then use the COCO API to compute IoU overlap between
     the gt box and the predictions. These should have IoU of 1.
     """
     weights = (5, 5, 10, 10)
     # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format
     gt_xywh_box = [10, 20, 100, 150]
     # 2/ convert it to our internal (x1, y1, x2, y2) format
     gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)
     # 3/ consider nearby proposal boxes
     prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)
     # 4/ compute proposal-to-gt transformation deltas
     deltas = box_utils.bbox_transform_inv(
         prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights
     )
     # 5/ use deltas to transform proposals to xyxy predicted box
     pred_xyxy_boxes = box_utils.bbox_transform(
         prop_xyxy_boxes, deltas, weights=weights
     )
     # 6/ convert xyxy predicted box to xywh predicted box
     pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)
     # 7/ use COCO API to compute IoU
     not_crowd = [int(False)] * pred_xywh_boxes.shape[0]
     ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd)
     np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
Exemple #3
0
def _compute_targets(entry):
    """Compute bounding-box regression targets for an image."""
    # Indices of ground-truth ROIs
    rois = entry['boxes']
    overlaps = entry['max_overlaps']
    labels = entry['max_classes']
    gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
    # Targets has format (class, tx, ty, tw, th)
    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return targets

    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = box_utils.bbox_overlaps(
        rois[ex_inds, :].astype(dtype=np.float32, copy=False),
        rois[gt_inds, :].astype(dtype=np.float32, copy=False))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]
    # Use class "1" for all boxes if using class_agnostic_bbox_reg
    targets[ex_inds, 0] = (
        1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds])
    targets[ex_inds, 1:] = box_utils.bbox_transform_inv(
        ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS)
    return targets
def _compute_targets(entry):
    """Compute bounding-box regression targets for an image."""
    # Indices of ground-truth ROIs
    rois = entry['boxes']
    overlaps = entry['max_overlaps']
    labels = entry['max_classes']
    gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
    # Targets has format (class, tx, ty, tw, th)
    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return targets

    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = box_utils.bbox_overlaps(
        rois[ex_inds, :].astype(dtype=np.float32, copy=False),
        rois[gt_inds, :].astype(dtype=np.float32, copy=False))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]
    # Use class "1" for all boxes if using class_agnostic_bbox_reg
    targets[ex_inds,
            0] = (1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds])
    targets[ex_inds,
            1:] = box_utils.bbox_transform_inv(ex_rois, gt_rois,
                                               cfg.MODEL.BBOX_REG_WEIGHTS)
    return targets
 def test_bbox_dataset_to_prediction_roundtrip(self):
     """Simulate the process of reading a ground-truth box from a dataset,
     make predictions from proposals, convert the predictions back to the
     dataset format, and then use the COCO API to compute IoU overlap between
     the gt box and the predictions. These should have IoU of 1.
     """
     weights = (5, 5, 10, 10)
     # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format
     gt_xywh_box = [10, 20, 100, 150]
     # 2/ convert it to our internal (x1, y1, x2, y2) format
     gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)
     # 3/ consider nearby proposal boxes
     prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)
     # 4/ compute proposal-to-gt transformation deltas
     deltas = box_utils.bbox_transform_inv(prop_xyxy_boxes,
                                           np.array([gt_xyxy_box]),
                                           weights=weights)
     # 5/ use deltas to transform proposals to xyxy predicted box
     pred_xyxy_boxes = box_utils.bbox_transform(prop_xyxy_boxes,
                                                deltas,
                                                weights=weights)
     # 6/ convert xyxy predicted box to xywh predicted box
     pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)
     # 7/ use COCO API to compute IoU
     not_crowd = [int(False)] * pred_xywh_boxes.shape[0]
     ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]),
                         not_crowd)
     np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
Exemple #6
0
def _compute_action_targets(person_rois, gt_boxes, role_ids):
    '''
    Compute action targets
    :param person_rois: rois assigned to gt acting-human, n * 4
    :param gt_boxes: all gt boxes in one image
    :param role_ids: person_rois_num * action_cls_num * NUM_TARGET_OBJECT_TYPES,
                     store person rois corresponding role object ids.
    :return:
    '''
    assert person_rois.shape[0] == role_ids.shape[0]
    # ToDo: should use cfg.MODEL.BBOX_REG_WEIGHTS?
    # calculate targets between every person rois and every gt_boxes
    targets = box_utils.bbox_transform_inv(
        np.repeat(person_rois, gt_boxes.shape[0], axis=0),
        np.tile(gt_boxes, (person_rois.shape[0], 1)),
        (1., 1., 1., 1.)).reshape(person_rois.shape[0], gt_boxes.shape[0], -1)
    # human action targets is (person_num: 16, action_num: 26, role_cls: 2, relative_location: 4)
    human_action_targets = np.zeros(
        (role_ids.shape[0], role_ids.shape[1], role_ids.shape[2], 4),
        dtype=np.float32)
    action_target_weights = np.zeros_like(human_action_targets,
                                          dtype=np.float32)
    # get action targets relative location
    human_action_targets[np.where(role_ids > -1)] = \
        targets[np.where(role_ids > -1)[0], role_ids[np.where(role_ids > -1)].astype(int)]
    action_target_weights[np.where(role_ids > -1)] = 1.

    return human_action_targets.reshape(-1, cfg.VCOCO.NUM_ACTION_CLASSES * cfg.VCOCO.NUM_TARGET_OBJECT_TYPES * 4), \
            action_target_weights.reshape(-1, cfg.VCOCO.NUM_ACTION_CLASSES * cfg.VCOCO.NUM_TARGET_OBJECT_TYPES * 4)
Exemple #7
0
def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4

    targets = box_utils.bbox_transform_inv(ex_rois, gt_rois,
                                           cfg.MODEL.BBOX_REG_WEIGHTS)
    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32,
                                                              copy=False)
Exemple #8
0
def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4

    targets = box_utils.bbox_transform_inv(ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS)
    # Use class "1" for all fg boxes if using class_agnostic_bbox_reg
    if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
        labels.clip(max=1, out=labels)
    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
 def test_bbox_transform_and_inverse(self):
     weights = (5, 5, 10, 10)
     src_boxes = random_boxes([10, 10, 20, 20], 1, 10)
     dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)
     deltas = box_utils.bbox_transform_inv(
         src_boxes, dst_boxes, weights=weights
     )
     dst_boxes_reconstructed = box_utils.bbox_transform(
         src_boxes, deltas, weights=weights
     )
     np.testing.assert_array_almost_equal(
         dst_boxes, dst_boxes_reconstructed, decimal=5
     )
Exemple #10
0
def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    # Following are no longer true with tubes. Also, since bbox_transform_inv
    # can handle tubes, we don't need these assertions
    # assert ex_rois.shape[1] == 4
    # assert gt_rois.shape[1] == 4

    targets = box_utils.bbox_transform_inv(
        ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS)
    return np.hstack(
        (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
 def test_bbox_transform_and_inverse(self):
     weights = (5, 5, 10, 10)
     src_boxes = random_boxes([10, 10, 20, 20], 1, 10)
     dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)
     deltas = box_utils.bbox_transform_inv(src_boxes,
                                           dst_boxes,
                                           weights=weights)
     dst_boxes_reconstructed = box_utils.bbox_transform(src_boxes,
                                                        deltas,
                                                        weights=weights)
     np.testing.assert_array_almost_equal(dst_boxes,
                                          dst_boxes_reconstructed,
                                          decimal=5)
Exemple #12
0
def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4

    targets = box_utils.bbox_transform_inv(
        ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS
    )
    return np.hstack((labels[:, np.newaxis], targets)).astype(
        np.float32, copy=False
    )
Exemple #13
0
def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    # Following are no longer true with tubes. Also, since bbox_transform_inv
    # can handle tubes, we don't need these assertions
    # assert ex_rois.shape[1] == 4
    # assert gt_rois.shape[1] == 4

    targets = box_utils.bbox_transform_inv(ex_rois, gt_rois,
                                           cfg.MODEL.BBOX_REG_WEIGHTS)
    return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32,
                                                              copy=False)
Exemple #14
0
def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4

    targets = box_utils.bbox_transform_inv(ex_rois, gt_rois,
                                           cfg.MODEL.BBOX_REG_WEIGHTS)
    # Use class "1" for all fg boxes if using class_agnostic_bbox_reg
    if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
        labels.clip(max=1, out=labels)
    return np.hstack((labels[:, np.newaxis], targets)).astype(
        np.float32, copy=False)
Exemple #15
0
def get_pair_feature(boxes1, boxes2):
    delta_1 = bbox_transform_inv(boxes1, boxes2)
    delta_2 = bbox_transform_inv(boxes2, boxes1)
    spt_feat = np.hstack((delta_1, delta_2[:, :2]))
    return spt_feat
Exemple #16
0
def generate_triplets(rois, rois_human_inds, rois_object_inds, rois_to_gt_ind,
                      gt_role_id, batch_idx):
    """
    :param rois:
    :param rois_human_inds: human ind to rois index
    :param rois_object_inds:
    :param rois_to_gt_ind: rois index to gt box index
    :param gt_role_id:
    :param batch_idx:
    :return:
    """
    # ToDo: cfg
    # ipdb.set_trace()
    triplets_num_per_image = cfg.VCOCO.TRIPLETS_NUM_PER_IM
    fg_triplets_num_per_image = int(triplets_num_per_image *
                                    cfg.VCOCO.FG_TRIPLETS_FRACTION)

    # label matrix
    gt_action_mat = generate_action_mat(gt_role_id)  #N x N x 26 x 2

    # generate combinations
    human_rois_inds, object_rois_inds = np.meshgrid(
        np.arange(rois_human_inds.size),
        np.arange(rois_object_inds.size),
        indexing='ij')
    human_rois_inds, object_rois_inds = human_rois_inds.reshape(
        -1), object_rois_inds.reshape(-1)
    # triplet labels
    action_labels = gt_action_mat[rois_to_gt_ind[
        rois_human_inds[human_rois_inds]], rois_to_gt_ind[
            rois_object_inds[object_rois_inds]]]  # (hN' x oN') x 26 x 2
    interaction_action_mask = np.array(cfg.VCOCO.ACTION_MASK).T
    # convert to 24-class
    # action_labels: (hN' x oN') x 24
    # interaction_affinity: (hN' x oN') x 1
    # init_part_attens: (hN' x oN') x 7 x 17 (last dimension is the holistic atten which is all 1)
    action_labels = action_labels[:,
                                  np.where(interaction_action_mask > 0)[0],
                                  np.where(interaction_action_mask > 0)[1]]
    interaction_affinity = np.any(
        action_labels.reshape(action_labels.shape[0], -1) > 0, 1)

    # info for training
    union_boxes = box_utils.get_union_box(
        rois[rois_human_inds[human_rois_inds]][:, 1:],
        rois[rois_object_inds[object_rois_inds]][:, 1:])
    union_boxes = np.concatenate((batch_idx * np.ones(
        (union_boxes.shape[0], 1), dtype=union_boxes.dtype), union_boxes),
                                 axis=1)
    relative_location = box_utils.bbox_transform_inv(
        rois[rois_human_inds[human_rois_inds]][:, 1:],
        rois[rois_object_inds[object_rois_inds]][:, 1:])

    # sample fg/bg triplets
    fg_triplets_inds = np.where(np.sum(action_labels, axis=1) > 0)[0]
    bg_triplets_inds = np.setdiff1d(np.arange(action_labels.shape[0]),
                                    fg_triplets_inds)

    fg_triplets_num_this_image = min(int(triplets_num_per_image * 1 / 4.),
                                     fg_triplets_inds.size)
    if fg_triplets_inds.size > 0:
        fg_triplets_inds = npr.choice(fg_triplets_inds,
                                      size=fg_triplets_num_this_image,
                                      replace=False)

    bg_triplets_num_this_image = max(fg_triplets_num_this_image * 3, 1)
    bg_triplets_num_this_image = min(bg_triplets_num_this_image,
                                     bg_triplets_inds.size)

    if bg_triplets_inds.size > 0 and bg_triplets_num_this_image > 0:
        bg_triplets_inds = npr.choice(bg_triplets_inds,
                                      size=bg_triplets_num_this_image,
                                      replace=False)

        keep_triplets_inds = np.concatenate(
            (fg_triplets_inds, bg_triplets_inds))
    else:
        keep_triplets_inds = fg_triplets_inds

    return_dict = dict(
        human_inds=human_rois_inds[keep_triplets_inds],
        object_inds=object_rois_inds[keep_triplets_inds],
        union_boxes=union_boxes[keep_triplets_inds],
        action_labels=action_labels[keep_triplets_inds],
        spatial_info=relative_location[keep_triplets_inds],
        interaction_affinity=interaction_affinity[keep_triplets_inds],
    )

    return return_dict
Exemple #17
0
def compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
    """Compute bounding-box regression targets for an image."""
    return box_utils.bbox_transform_inv(ex_rois, gt_rois,
                                        weights).astype(np.float32, copy=False)
Exemple #18
0
    def forward(self, probs, anchor_deltas, img_info):
        """
        Args:
            probs (Tensor): Classification probability of the anchors.
            anchor_deltas (Tensor): Anchor regression deltas.
            img_info (Tensor[3]): (height, width, scale)

        Returns:
            proposals (Tensor[N, 5]): Predicted region proposals in (0, x1, y1, x2, y2) format.
                                      0 means these proposals are from the first image in the batch.
        """
        # Algorithm:
        #
        # For each (H, W) location i:
        #     Generate A anchors centered on cell i
        #     Apply predicted anchor regression deltas at cell i to each of the A anchors
        # Clip predicted boxes to image
        # Remove predicted boxes with either height or width < threshold
        # Sort all (proposal, score) pairs by score from highest to lowest
        # Take top pre_nms_topN proposals before NMS
        # Apply NMS with threshold 0.7 to remaining proposals
        # Take after_nms_topN proposals after NMS

        assert probs.size(0) == 1, "Single batch only."

        cfg_key = "TRAIN" if self.training else "TEST"
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # The first set of num_anchors channels are bg probs
        # The second set are the fg probs, which we want
        probs = probs[:, self.num_anchors:, :, :]

        # 1. Generate proposals from regression deltas and shifted anchors
        height, width = probs.shape[-2:]

        # Enumerate all shifts (NOTE: torch.meshgrid is different from np.meshgrid)
        shift_x = torch.arange(0, width) * self.feat_stride
        shift_y = torch.arange(0, height) * self.feat_stride
        shift_y, shift_x = torch.meshgrid(shift_y, shift_x)
        shift_x, shift_y = shift_x.contiguous(), shift_y.contiguous()
        shifts = torch.stack((shift_x.view(-1), shift_y.view(-1),
                              shift_x.view(-1), shift_y.view(-1)),
                             dim=1)
        shifts = shifts.type_as(probs)

        # Enumerate all shifted anchors:
        # Add A anchors (1, A, 4) to K shifts (K, 1, 4) to get shifted anchors (K, A, 4)
        # Reshape to (K * A, 4) shifted anchors
        A = self.num_anchors
        K = shifts.size(0)
        self.anchors = self.anchors.type_as(probs)
        anchors = self.anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(
            1, 0, 2)
        anchors = anchors.view(K * A, 4)

        # Permute and reshape predicted anchor regression deltas to the same order as the anchors:
        # Anchor deltas will be (1, 4 * A, H, W) format
        # Permute to (1, H, W, 4 * A)
        # Reshape to (1 * H * W * A, 4)
        anchor_deltas = anchor_deltas.permute(0, 2, 3,
                                              1).contiguous().view(-1, 4)

        # Safe-guard for unexpected large dw or dh value.
        # Since our proposals are only human, some background region features will never
        # receive gradients from bbox regression. Thus their predictions may drift away.
        anchor_deltas[:, 2:].clamp_(-10, 10)

        # Same story for the scores:
        # Scores are (1, A, H, W) format
        # Permute to (1, H, W, A)
        # Reshape to (1 * H * W * A, 1)
        probs = probs.permute(0, 2, 3, 1).contiguous().view(-1, 1)

        # Convert anchors into proposals via regression deltas
        proposals = bbox_transform_inv(anchors, anchor_deltas)

        # 2. Clip predicted proposals to image
        proposals = clip_boxes(proposals, img_info[:2])

        # 3. Remove predicted boxes with either height or width < threshold
        # (NOTE: need to scale min_size with the input image scale stored in img_info[2])
        widths = proposals[:, 2] - proposals[:, 0] + 1
        heights = proposals[:, 3] - proposals[:, 1] + 1
        min_size = min_size * img_info[2]
        keep = torch.nonzero((widths >= min_size) & (heights >= min_size))[:,
                                                                           0]
        proposals = proposals[keep]
        probs = probs[keep]

        # 4. Sort all (proposal, score) pairs by score from highest to lowest
        # 5. Take top pre_nms_topN (e.g. 6000)
        order = probs.view(-1).argsort(descending=True)
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order]
        probs = probs[order]

        # 6. Apply nms (e.g. threshold = 0.7)
        # 7. Take after_nms_topN (e.g. 300)
        # 8. Return the top proposals
        keep = nms(proposals, probs.squeeze(1), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep]
        probs = probs[keep]

        # proposals: [img_id, x1, y1, x2, y2]
        # Our RPN implementation only supports a single input image, so all img_ids are 0.
        proposals = torch.cat(
            (torch.zeros(proposals.size(0), 1).type_as(probs), proposals),
            dim=1)
        return proposals
Exemple #19
0
def compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
    """Compute bounding-box regression targets for an image."""
    return box_utils.bbox_transform_inv(ex_rois, gt_rois, weights).astype(
        np.float32, copy=False
    )