Exemple #1
0
    def encode(label_boxes_3d, p2):
        """
            projection points of 3d bbox center and its corners_3d in local
            coordinates frame

        Returns:
            depth of center:
            center 3d location:
            local_corners:
        """
        #  import ipdb
        #  ipdb.set_trace()
        # global to local
        global_corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        location = label_boxes_3d[:, :3]
        center_depth = location[:, -1:]
        center_2d = geometry_utils.torch_points_3d_to_points_2d(location, p2)
        ry = label_boxes_3d[:, -1:]

        num_boxes = global_corners_3d.shape[0]

        # local_corners_3d = (global_corners_3d.permute(0, 2, 1) -
        # location.unsqueeze(-1)).permute(
        # 0, 2, 1).contiguous().view(num_boxes, -1)

        # instance depth
        # instance_depth = location[:, -1:]
        dims = label_boxes_3d[:, 3:6]

        return torch.cat([dims, ry, center_2d, center_depth, location], dim=-1)
    def encode(label_boxes_3d, label_boxes_2d, p2, image_info):
        """
            return projections of 3d bbox corners in the inner of 2d bbox.
            Note that set the visibility at the same time according to the 2d bbox
            and image boundary.(truncated or occluded)
        """
        # import ipdb
        # ipdb.set_trace()

        # shape(N, 8, 2)
        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        corners_2d = geometry_utils.torch_points_3d_to_points_2d(
            corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2)
        # corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
        # label_boxes_3d, p2)
        corners_2d = NearestV2CornerCoder.reorder_boxes_4c(corners_2d)

        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        image_filter = geometry_utils.torch_window_filter(corners_2d,
                                                          image_shape,
                                                          deltas=200)

        boxes_2d_filter = geometry_utils.torch_window_filter(
            corners_2d, label_boxes_2d)

        # disable it at preseant
        self_occluded_filter = Corner2DCoder.get_occluded_filter(corners_3d)
        # self_occluded_filter = torch.ones_like(image_filter)
        # self_occluded_filter = 0.1 * self_occluded_filter.float()

        # points outside of image must be filter out
        visibility = image_filter.float() * self_occluded_filter
        # visibility = visibility & boxes_2d_filter & self_occluded_filter

        # remove invisibility points
        #  corners_2d[~visibility] = -1

        # normalize using label bbox 2d
        label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(
            label_boxes_2d.unsqueeze(0)).squeeze(0)
        wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1)
        left_top = label_boxes_2d[:, :2].unsqueeze(1)
        # mid = label_boxes_2d_xywh[:, :2].unsqueeze(1)
        encoded_corners_2d = (corners_2d - left_top) / wh

        encoded_corners_2d = torch.cat(
            [encoded_corners_2d,
             visibility.unsqueeze(-1).float()], dim=-1)
        return encoded_corners_2d.contiguous().view(
            encoded_corners_2d.shape[0], -1)
Exemple #3
0
    def encode(label_boxes_3d, label_boxes_2d, p2, image_info):
        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)

        # encode depth first
        center_depth = label_boxes_3d[:, 2]
        # encoded_depth = corners_3d[..., -1] - center_depth.unsqueeze(-1)
        # encoded_depth = 1/F.sigmoid(corners_3d[..., -1]) - 1
        encoded_depth = corners_3d[..., -1]
        corners_2d = torch.cat(
            [corners_2d, encoded_depth.unsqueeze(-1)], dim=-1)
        front_plane = corners_2d[:, Order.planes()[0]]
        rear_plane = corners_2d[:, Order.planes()[1]]
        encoded_front_plane, reorder_front_plane = Corner2DNearestCoder.encode_with_bbox(
            front_plane, label_boxes_2d)
        encoded_rear_plane, reorder_rear_plane = Corner2DNearestCoder.encode_with_bbox(
            rear_plane, label_boxes_2d)

        encoded_points = torch.cat([encoded_front_plane, encoded_rear_plane],
                                   dim=1)
        # boxes_2d_filter = geometry_utils.torch_window_filter(corners_2d,
        # label_boxes_2d)
        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        # DONE fix bugs of reorder for visibility
        reorder_corners_2d = torch.cat(
            [reorder_front_plane, reorder_rear_plane], dim=1)
        # remove depth channels
        image_filter = geometry_utils.torch_window_filter(
            reorder_corners_2d[:, :, :-1], image_shape, deltas=200)
        visibility = image_filter
        # visibility = torch.cat(
        # [visibility[:, Order.planes()[0]], visibility[:, Order.planes()[1]]], dim=-1)
        encoded_all = torch.cat(
            [encoded_points, visibility.unsqueeze(-1).float()], dim=-1)

        encoded_all = encoded_all.view(encoded_all.shape[0], -1)
        # append center_depth

        # encode center detph
        # center_depth = 1/F.sigmoid(center_depth) - 1
        return torch.cat([encoded_all, center_depth.unsqueeze(-1)], dim=-1)
Exemple #4
0
def main():

    normal_mean = np.asarray([0.485, 0.456, 0.406])
    normal_van = np.asarray([0.229, 0.224, 0.225])
    dataset = build_dataset()
    image_dir = '/data/object/training/image_2'
    result_dir = './results/data'
    save_dir = 'results/images'
    calib_dir = '/data/object/training/calib'
    label_dir = None
    calib_file = None
    visualizer = ImageVisualizer(image_dir,
                                 result_dir,
                                 label_dir=label_dir,
                                 calib_dir=calib_dir,
                                 calib_file=calib_file,
                                 online=True,
                                 save_dir=save_dir)
    for sample in dataset:
        label_boxes_3d = sample['gt_boxes_3d']
        label_boxes_2d = sample['gt_boxes']
        label_classes = sample['gt_labels']
        p2 = torch.from_numpy(sample['p2'])
        image_path = sample['img_name']

        label_boxes_3d = torch.cat([
            label_boxes_3d[:, 3:6], label_boxes_3d[:, :3], label_boxes_3d[:,
                                                                          6:]
        ],
                                   dim=-1)
        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        image = sample['img'].permute(1, 2, 0).cpu().detach().numpy()
        image = image.copy()
        image = image * normal_van + normal_mean
        # import ipdb
        # ipdb.set_trace()
        corners_3d = corners_3d.cpu().detach().numpy()
        visualizer.render_image_corners_2d(image_path,
                                           image,
                                           corners_3d=corners_3d,
                                           p2=p2)
    def encode(label_boxes_3d, label_boxes_2d, p2):
        """
            projection points of 3d bbox center and its corners_3d in local
            coordinates frame
        """
        # global to local
        global_corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        C = label_boxes_3d[:, :3]

        # proj of 3d bbox center
        C_2d = geometry_utils.torch_points_3d_to_points_2d(C, p2)

        alpha = geometry_utils.compute_ray_angle(C_2d.unsqueeze(0),
                                                 p2.unsqueeze(0)).squeeze(0)
        R = geometry_utils.torch_ry_to_rotation_matrix(-alpha).type_as(
            global_corners_3d)

        # local coords
        num_boxes = global_corners_3d.shape[0]
        local_corners_3d = torch.matmul(
            R,
            global_corners_3d.permute(0, 2, 1) - C.unsqueeze(-1)).permute(
                0, 2, 1).contiguous().view(num_boxes, -1)

        # instance depth
        instance_depth = C[:, -1:]

        # finally encode them(local_corners_3d is encoded already)
        # C_2d is encoded by center of 2d bbox
        # this func supports batch format only
        label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(
            label_boxes_2d.unsqueeze(0)).squeeze(0)
        encoded_C_2d = (
            C_2d - label_boxes_2d_xywh[:, :2]) / label_boxes_2d_xywh[:, 2:]

        # instance_depth is encoded just by inverse it
        # instance_depth_inv = 1 / instance_depth

        return torch.cat([local_corners_3d, encoded_C_2d, instance_depth],
                         dim=-1)
Exemple #6
0
def test_geometry():

    dataset = build_dataset()
    for sample in dataset:
        # img_name = sample['img_name']
        # if img_name =='/data/object/training/image_2/001017.png':
        # import ipdb
        # ipdb.set_trace()
        # else:
        # continue

        label_boxes_3d = sample['gt_boxes_3d']
        p2 = torch.from_numpy(sample['p2'])
        label_boxes_3d = torch.cat([
            label_boxes_3d[:, 3:6], label_boxes_3d[:, :3], label_boxes_3d[:,
                                                                          6:]
        ],
                                   dim=-1)

        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        front_mid = corners_3d[:, [0, 1]].mean(dim=1)
        rear_mid = corners_3d[:, [2, 3]].mean(dim=1)
        points_3d = torch.cat([rear_mid, front_mid], dim=0)
        points_2d = geometry_utils.torch_points_3d_to_points_2d(points_3d, p2)

        lines = points_2d.contiguous().view(2, -1, 2).permute(
            1, 0, 2).contiguous().view(-1, 4)
        # import ipdb
        # ipdb.set_trace()
        ry_pred1 = geometry_utils.torch_pts_2d_to_dir_3d_v2(
            lines.unsqueeze(0), p2.unsqueeze(0))[0]
        # ry_pred2 = geometry_utils.torch_dir_to_angle()
        # deltas = points_3d[1]-points_3d[0]
        # ry_pred2 = -torch.atan2(deltas[2], deltas[0])
        ry_gt = label_boxes_3d[:, -1]
        height = label_boxes_3d[:, 1]
        ry_gt[height < 0] = geometry_utils.reverse_angle(ry_gt[height < 0])
        cond = torch.abs(ry_pred1 - ry_gt) < 1e-4
        assert cond.all(), '{} error {} {}'.format(sample['img_name'], ry_gt,
                                                   ry_pred1)
Exemple #7
0
    def encode(label_boxes_3d, proposals, p2):
        """
        Args:
            label_boxes_3d: shape(N, 7)
            proposals: shape(N, 4)
            p2: shape(3, 4)
        """
        # import ipdb
        # ipdb.set_trace()
        # shape(N, 8, 3)
        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        corners_2d = geometry_utils.torch_points_3d_to_points_2d(
            corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2)
        # shape(N, 3)
        left_side_points_3d = (corners_3d[:, 0] + corners_3d[:, 3]) / 2
        right_side_points_3d = (corners_3d[:, 1] + corners_3d[:, 2]) / 2

        # shape(N, 2, 2)
        left_side = torch.stack([corners_2d[:, 0], corners_2d[:, 3]], dim=1)
        right_side = torch.stack([corners_2d[:, 1], corners_2d[:, 2]], dim=1)

        # shape(N, 2, 2, 2)
        side = torch.stack([left_side, right_side], dim=1)

        # no rotation
        K = p2[:3, :3]
        KT = p2[:, -1]
        T = torch.matmul(torch.inverse(K), KT)
        C = -T
        # shape(N, )
        left_dist = torch.norm(left_side_points_3d - C, dim=-1)
        right_dist = torch.norm(right_side_points_3d - C, dim=-1)
        dist = torch.stack([left_dist, right_dist], dim=-1)
        _, visible_index = torch.min(dist, dim=-1)

        row = torch.arange(visible_index.numel()).type_as(visible_index)
        # may be one of them or may be none of them
        visible_side = side[row, visible_index]

        # img_name = '/data/object/training/image_2/000052.png'
        # draw_line(img_name, visible_side)

        # in abnormal case both of them is invisible
        left_slope = geometry_utils.torch_line_to_orientation(left_side[:, 0],
                                                              left_side[:, 1])
        right_slope = geometry_utils.torch_line_to_orientation(
            right_side[:, 0], right_side[:, 1])
        non_visible_cond = left_slope * right_slope < 0

        visible_slope = geometry_utils.torch_line_to_orientation(
            visible_side[:, 0], visible_side[:, 1])
        # cls_orients
        cls_orients = visible_slope > 0
        cls_orients = cls_orients.float()
        cls_orients[non_visible_cond] = 2.0

        # reg_orients
        boxes_3d_proj = geometry_utils.torch_corners_2d_to_boxes_2d(corners_2d)
        # shape(N, 4)
        boxes_3d_proj_xywh = geometry_utils.torch_xyxy_to_xywh(
            boxes_3d_proj.unsqueeze(0)).squeeze(0)
        direction = torch.abs(visible_side[:, 0] - visible_side[:, 1])
        reg_orients = direction / boxes_3d_proj_xywh[:, 2:]

        return torch.cat([cls_orients.unsqueeze(-1), reg_orients], dim=-1)
Exemple #8
0
def test_bbox_coder():

    bbox_coder = BBox3DCoder({})
    dataset = build_dataset()

    image_dir = '/data/object/training/image_2'
    result_dir = './results/data'
    save_dir = 'results/images'
    calib_dir = '/data/object/training/calib'
    label_dir = None
    calib_file = None
    visualizer = ImageVisualizer(
        image_dir,
        result_dir,
        label_dir=label_dir,
        calib_dir=calib_dir,
        calib_file=calib_file,
        online=True,
        save_dir=save_dir)

    for sample in dataset:
        mean_dims = torch.from_numpy(sample['mean_dims'][None])
        label_boxes_3d = sample['gt_boxes_3d']
        label_boxes_2d = sample['gt_boxes']
        label_classes = sample['gt_labels']
        p2 = torch.from_numpy(sample['p2'])
        bbox_coder.mean_dims = mean_dims

        encoded_corners_2d = bbox_coder.encode_batch_bbox(
            label_boxes_3d, label_boxes_2d, label_classes, p2)

        # side_lines = encoded_corners_2d[:, 16:20]

        # encoded_corners_2d = torch.cat(
        # [
        # encoded_corners_2d[:, :6], encoded_corners_2d[:, 6:11],
        # encoded_corners_2d[:, 10:11], encoded_corners_2d[:, 11:16],
        # encoded_corners_2d[:, 15:16]
        # ],
        # dim=-1)

        decoded_corners_2d = bbox_coder.decode_batch_bbox(
            encoded_corners_2d, label_boxes_2d, p2)

        boxes_3d = torch.cat(
            [
                decoded_corners_2d[:, 6:9], decoded_corners_2d[:, 3:6],
                decoded_corners_2d[:, -1:]
            ],
            dim=-1)
        # boxes_3d = decoded_corners_2d
        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(boxes_3d)

        corners_3d = corners_3d.cpu().detach().numpy()

        # import ipdb
        # ipdb.set_trace()
        # image_path = sample[]
        image_path = sample['img_name']
        image = sample['img'].permute(1, 2, 0).cpu().detach().numpy()
        image = image.copy()
        image = image * normal_van + normal_mean
        # image = None
        # corners_2d = torch.cat([side_lines] * 4, dim=-1).view(-1, 8, 2)
        # corners_2d = corners_2d.cpu().detach().numpy()
        visualizer.render_image_corners_2d(
            image_path, image, corners_3d=corners_3d, p2=p2)
Exemple #9
0
    def encode(label_boxes_3d, proposals, p2, image_info):
        """
        return projections of 3d bbox corners in the inner of 2d bbox.
            Note that set the visibility at the same time according to the 2d bbox
            and image boundary.(truncated or occluded)
        """
        label_boxes_2d = proposals
        # shape(N, 8, 2)
        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        corners_2d = geometry_utils.torch_points_3d_to_points_2d(
            corners_3d.reshape((-1, 3)), p2).reshape(-1, 8, 2)

        image_shape = torch.tensor([0, 0, image_info[1], image_info[0]])
        image_shape = image_shape.type_as(corners_2d).view(1, 4)
        image_filter = geometry_utils.torch_window_filter(corners_2d,
                                                          image_shape,
                                                          deltas=200)

        # points outside of image must be filter out
        visibility = image_filter.float()

        # normalize using label bbox 2d
        label_boxes_2d_xywh = geometry_utils.torch_xyxy_to_xywh(
            label_boxes_2d.unsqueeze(0)).squeeze(0)
        # shape(N, 4, 2)
        label_corners_4c = geometry_utils.torch_xyxy_to_corner_4c(
            label_boxes_2d.unsqueeze(0)).squeeze(0)
        wh = label_boxes_2d_xywh[:, 2:].unsqueeze(1).unsqueeze(1)
        # left_top = label_boxes_2d[:, :2].unsqueeze(1)
        # mid = label_boxes_2d_xywh[:, :2].unsqueeze(1)
        corners_2d = corners_2d.unsqueeze(2)
        label_corners_4c = label_corners_4c.unsqueeze(1)
        encoded_corners_2d = (corners_2d - label_corners_4c) / wh
        # mean_size = torch.sqrt(wh[..., 0] * wh[..., 1])
        # weights = math_utils.gaussian2d(
        # corners_2d, label_corners_4c, sigma=mean_size)

        # import ipdb
        # ipdb.set_trace()
        dist = torch.norm(encoded_corners_2d, dim=-1)  # (N,8,4)
        dist_min, dist_argmin = dist.min(dim=-1)  # (N,8)
        corners_2d_scores = torch.zeros_like(dist)
        corners_2d_scores = corners_2d_scores.view(-1, 4)
        # offset = torch.arange(dist_argmin.numel()) * 4
        # col_index = dist_argmin.view(-1) + offset.type_as(dist_argmin)
        col_index = dist_argmin.view(-1)
        row_index = torch.arange(col_index.numel()).type_as(col_index)
        corners_2d_scores[row_index, col_index] = 1
        corners_2d_scores = corners_2d_scores.view(-1, 8, 4)
        # tensor_utils.multidim_index(corners_2d_scores, dist_argmin)
        visibility = visibility.unsqueeze(-1) * corners_2d_scores

        # encoded_corners_2d = torch.cat(
        # [
        # encoded_corners_2d,
        # visibility.unsqueeze(-1)
        # # corners_2d_scores.unsqueeze(-1)
        # ],
        # dim=-1)
        # encoded_corners_2d = torch.cat(
        # [
        # encoded_corners_2d.view(encoded_corners_2d.shape[0], 8, -1),
        # dist_argmin.unsqueeze(-1).float()
        # ],
        # dim=-1)
        # encoded_corners_2d = encoded_corners_2d.contiguous().view(
        # encoded_corners_2d.shape[0], -1)
        # import ipdb
        # ipdb.set_trace()
        N = encoded_corners_2d.shape[0]
        return torch.cat([
            encoded_corners_2d.contiguous().view(N, -1),
            visibility.view(N, -1),
            dist_argmin.float().view(N, -1)
        ],
                         dim=-1)
Exemple #10
0
    def encode(label_boxes_3d, proposals, p2, image_info, label_boxes_2d):
        """
            projection points of 3d bbox center and its corners_3d in local
            coordinates frame

        Returns:
            depth of center:
            center 3d location:
            local_corners:
        """
        num_instances = label_boxes_3d.shape[0]
        # global to local
        corners_2d = geometry_utils.torch_boxes_3d_to_corners_2d(
            label_boxes_3d, p2)

        proposals_xywh = geometry_utils.torch_xyxy_to_xywh(
            proposals.unsqueeze(0)).squeeze(0)
        wh = proposals_xywh[:, 2:].unsqueeze(1)
        xy = proposals_xywh[:, :2].unsqueeze(1)

        corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(
            label_boxes_3d)
        bottom_corners_3d = corners_3d[:, [0, 1, 2, 3]]
        visible_index = Corner3DCoder.find_visible_side(bottom_corners_3d)
        visible_corners_3d = tensor_utils.multidim_index(
            bottom_corners_3d, visible_index)
        visible_side_line_2d = geometry_utils.torch_points_3d_to_points_2d(
            visible_corners_3d.contiguous().view(-1, 3),
            p2).view(num_instances, -1, 2)
        visible_cond = (
            visible_side_line_2d[:, 1, 0] - visible_side_line_2d[:, 0, 0]
        ) * (visible_side_line_2d[:, 2, 0] - visible_side_line_2d[:, 0, 0]) < 0

        # visible_index[invisible_cond, -1] = visible_index[invisible_cond, -2]
        _, order = torch.sort(visible_side_line_2d[..., 0],
                              dim=-1,
                              descending=False)
        visible_index = tensor_utils.multidim_index(
            visible_index.unsqueeze(-1), order).squeeze(-1)

        # import ipdb
        # ipdb.set_trace()
        bottom_corners = corners_2d[:, [0, 1, 2, 3]]
        top_corners = corners_2d[:, [4, 5, 6, 7]]
        bottom_corners = tensor_utils.multidim_index(bottom_corners,
                                                     visible_index)
        top_corners = tensor_utils.multidim_index(top_corners, visible_index)
        bottom_corners_3d = tensor_utils.multidim_index(
            bottom_corners_3d, visible_index)
        dist = torch.norm(bottom_corners_3d, dim=-1)
        merge_left_cond = dist[:, 0] < dist[:, 2]

        # box truncated
        # import ipdb
        # ipdb.set_trace()
        # bottom
        # left
        bottom_corners[:, 0, 0] = torch.min(bottom_corners[:, 0, 0],
                                            label_boxes_2d[:, 2])
        bottom_corners[:, 0, 0] = torch.max(bottom_corners[:, 0, 0],
                                            label_boxes_2d[:, 0])

        # right
        bottom_corners[:, 2, 0] = torch.min(bottom_corners[:, 2, 0],
                                            label_boxes_2d[:, 2])
        bottom_corners[:, 2, 0] = torch.max(bottom_corners[:, 2, 0],
                                            label_boxes_2d[:, 0])

        # top
        top_corners[:, 0, 0] = torch.min(top_corners[:, 0, 0],
                                         label_boxes_2d[:, 2])
        top_corners[:, 0, 0] = torch.max(top_corners[:, 0, 0],
                                         label_boxes_2d[:, 0])

        top_corners[:, 2, 0] = torch.min(top_corners[:, 2, 0],
                                         label_boxes_2d[:, 2])
        top_corners[:, 2, 0] = torch.max(top_corners[:, 2, 0],
                                         label_boxes_2d[:, 0])

        in_box_cond = (bottom_corners[:, 1, 0] < label_boxes_2d[:, 2]) & (
            bottom_corners[:, 1, 0] > label_boxes_2d[:, 0])

        # bottom_corners[:, [0, 2], 0] = bottom_corners[:, [0, 2], 0]
        # top_corners[:, :, 0] = top_corners[:, :, 0].clamp(
        # min=0, max=image_info[1])

        visibility = visible_cond.float() * in_box_cond.float()
        # import ipdb
        # ipdb.set_trace()
        index = torch.nonzero(visibility <= 0).view(-1)
        tmp = bottom_corners[index]
        merge_left_cond = merge_left_cond[index]
        merge_right_cond = ~merge_left_cond
        tmp_left = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1)
        tmp_right = torch.stack([tmp[:, 0], tmp[:, 2], tmp[:, 2]], dim=1)
        # tmp = torch.cat(
        # [tmp_left[merge_left_cond], tmp_right[~merge_left_cond]], dim=0)
        tmp[merge_left_cond] = tmp_left[merge_left_cond]
        tmp[merge_right_cond] = tmp_right[merge_right_cond]
        bottom_corners[index] = tmp

        tmp = top_corners[index]
        # tmp = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1)
        tmp_left = torch.stack([tmp[:, 0], tmp[:, 0], tmp[:, 2]], dim=1)
        tmp_right = torch.stack([tmp[:, 0], tmp[:, 2], tmp[:, 2]], dim=1)
        tmp[merge_left_cond] = tmp[merge_left_cond]
        tmp[merge_right_cond] = tmp[merge_right_cond]
        # tmp = torch.cat(
        # [tmp_left[merge_left_cond], tmp_right[~merge_left_cond]], dim=0)
        top_corners[index] = tmp

        # encode
        encoded_bottom_corners = (bottom_corners - xy) / wh
        encoded_heights = (bottom_corners[..., 1] -
                           top_corners[..., 1]) / wh[..., 1]

        # import ipdb
        # ipdb.set_trace()
        mid_x = bottom_corners[:, 1, 0]
        ratio = (mid_x - proposals[:, 0]) / wh[:, 0, 0]
        ratio = ratio.clamp(min=0, max=1)

        # import ipdb
        # ipdb.set_trace()
        # encoded_bottom_corners = tensor_utils.multidim_index(
        # encoded_bottom_corners, visible_index)
        # encoded_heights = tensor_utils.multidim_index(
        # encoded_heights.unsqueeze(-1), visible_index)
        # tensor_utils.
        # visibility = tensor_utils.multidim_index(
        # visibility.unsqueeze(-1), visible_index).squeeze(-1)

        return torch.cat([
            encoded_bottom_corners.contiguous().view(num_instances, -1),
            encoded_heights.contiguous().view(num_instances, -1),
            ratio.view(num_instances, -1)
        ],
                         dim=-1)