def calculate_2d_bbox(row, view_matrix, proj_matrix, width, height): pos = np.array(row['pos']) rot = np.array(row['rot']) model_sizes = np.array(row['model_sizes']) points_3dbbox = get_model_3dbbox(model_sizes) # calculating model_coords_to_ndc, so we have both anc and viewed points point_homo = np.array([ points_3dbbox[:, 0], points_3dbbox[:, 1], points_3dbbox[:, 2], np.ones_like(points_3dbbox[:, 0]) ]) model_matrix = construct_model_matrix(pos, rot) point_homo = model_matrix @ point_homo viewed = view_matrix @ point_homo projected = proj_matrix @ viewed projected /= projected[3, :] bbox_3d = projected.T[:, 0:3] bbox_2d_points = bbox_3d[:, 0:2] is_3d_bbox_partially_outside = (bbox_2d_points < -1).any() or (bbox_2d_points > 1).any() bbox_2d_points = bbox_2d_points[((bbox_2d_points <= 1) & (bbox_2d_points >= -1)).all(axis=1)] if is_3d_bbox_partially_outside: bbox_2d = model_coords_to_pixel(pos, rot, points_3dbbox, view_matrix, proj_matrix, width, height).T # now we need to compute intersections between end of image and points # now we build 12 lines for 3d bounding box lines = list() lines.append([bbox_2d[0, :], bbox_2d[1, :]]) lines.append([bbox_2d[1, :], bbox_2d[3, :]]) lines.append([bbox_2d[3, :], bbox_2d[2, :]]) lines.append([bbox_2d[2, :], bbox_2d[0, :]]) lines.append([bbox_2d[4, :], bbox_2d[5, :]]) lines.append([bbox_2d[5, :], bbox_2d[7, :]]) lines.append([bbox_2d[7, :], bbox_2d[6, :]]) lines.append([bbox_2d[6, :], bbox_2d[4, :]]) lines.append([bbox_2d[4, :], bbox_2d[0, :]]) lines.append([bbox_2d[5, :], bbox_2d[1, :]]) lines.append([bbox_2d[6, :], bbox_2d[2, :]]) lines.append([bbox_2d[7, :], bbox_2d[3, :]]) borders = list() borders.append([[0, 0], [0, height]]) borders.append([[0, height], [width, height]]) borders.append([[0, 0], [width, 0]]) borders.append([[width, 0], [width, height]]) for line in lines: for border in borders: if are_intersecting(line, border): l1 = Line(Point(line[0][0], line[0][1]), Point(line[1][0], line[1][1])) l2 = Line(Point(border[0][0], border[0][1]), Point(border[1][0], border[1][1])) x, y = np.array(next(iter(l1.intersect(l2))), dtype=np.float32) ndc_y, ndc_x = pixel_to_ndc((y, x), (height, width)) bbox_2d_points = np.vstack((bbox_2d_points, [ndc_x, ndc_y])) bbox_2d = np.array([ [bbox_2d_points[:, 0].max(), -bbox_2d_points[:, 1].min()], [bbox_2d_points[:, 0].min(), -bbox_2d_points[:, 1].max()], ]) # rescale from [-1, 1] to [0, 1] bbox_2d = (bbox_2d / 2) + 0.5 return bbox_2d.tolist()