Esempio n. 1
0
def bm_face_areas_normals() -> None:
    kwargs_list = []
    backend_cuda = [False]
    if torch.cuda.is_available():
        backend_cuda.append(True)

    num_meshes = [2, 10, 32]
    num_verts = [100, 1000]
    num_faces = [300, 3000]

    test_cases = product(num_meshes, num_verts, num_faces, backend_cuda)
    for case in test_cases:
        n, v, f, c = case
        kwargs_list.append(
            {"num_meshes": n, "num_verts": v, "num_faces": f, "cuda": c}
        )
    benchmark(
        TestFaceAreasNormals.face_areas_normals_with_init,
        "FACE_AREAS_NORMALS",
        kwargs_list,
        warmup_iters=1,
    )

    benchmark(
        TestFaceAreasNormals.face_areas_normals_with_init_torch,
        "FACE_AREAS_NORMALS_TORCH",
        kwargs_list,
        warmup_iters=1,
    )
def bm_point_mesh_distance() -> None:

    backend = ["cuda:0"]

    kwargs_list = []
    batch_size = [4, 8, 16]
    num_verts = [100, 1000]
    num_faces = [300, 3000]
    num_points = [5000, 10000]
    test_cases = product(batch_size, num_verts, num_faces, num_points, backend)
    for case in test_cases:
        n, v, f, p, b = case
        kwargs_list.append({"N": n, "V": v, "F": f, "P": p, "device": b})

    benchmark(
        TestPointMeshDistance.point_mesh_edge,
        "POINT_MESH_EDGE",
        kwargs_list,
        warmup_iters=1,
    )

    benchmark(
        TestPointMeshDistance.point_mesh_face,
        "POINT_MESH_FACE",
        kwargs_list,
        warmup_iters=1,
    )
Esempio n. 3
0
def benchmark_rotated_iou():
    num_boxes1 = 200
    num_boxes2 = 500
    boxes1 = torch.stack([
        torch.tensor([5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32)
        for i in range(num_boxes1)
    ])
    boxes2 = torch.stack([
        torch.tensor(
            [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0],
            dtype=torch.float32,
        ) for i in range(num_boxes2)
    ])

    def func(dev, n=1):
        b1 = boxes1.to(device=dev)
        b2 = boxes2.to(device=dev)

        def bench():
            for _ in range(n):
                pairwise_iou_rotated(b1, b2)
            if dev.type == "cuda":
                torch.cuda.synchronize()

        return bench

    # only run it once per timed loop, since it's slow
    args = [{"dev": torch.device("cpu"), "n": 1}]
    if torch.cuda.is_available():
        args.append({"dev": torch.device("cuda"), "n": 10})

    benchmark(func, "rotated_iou", args, warmup_iters=3)
Esempio n. 4
0
def bm_blending() -> None:
    devices = ["cuda"]
    kwargs_list = []
    num_meshes = [8]
    image_size = [64, 128, 256]
    faces_per_pixel = [50, 100]
    backend = ["pytorch", "custom"]
    test_cases = product(num_meshes, image_size, faces_per_pixel, devices, backend)

    for case in test_cases:
        n, s, k, d, b = case
        kwargs_list.append(
            {
                "num_meshes": n,
                "image_size": s,
                "faces_per_pixel": k,
                "device": d,
                "backend": b,
            }
        )

    benchmark(
        TestBlending.bm_sigmoid_alpha_blending,
        "SIGMOID_ALPHA_BLENDING_PYTORCH",
        kwargs_list,
        warmup_iters=1,
    )

    kwargs_list = [case for case in kwargs_list if case["backend"] == "pytorch"]
    benchmark(
        TestBlending.bm_softmax_blending,
        "SOFTMAX_BLENDING_PYTORCH",
        kwargs_list,
        warmup_iters=1,
    )
Esempio n. 5
0
def bm_graph_conv() -> None:
    backends = ['cpu']
    if torch.cuda.is_available():
        backends.append('cuda')

    kwargs_list = []
    gconv_dim = [128, 256]
    num_meshes = [32, 64]
    num_verts = [100]
    num_faces = [1000]
    directed = [False, True]
    test_cases = product(gconv_dim, num_meshes, num_verts, num_faces, directed,
                         backends)
    for case in test_cases:
        g, n, v, f, d, b = case
        kwargs_list.append({
            'gconv_dim': g,
            'num_meshes': n,
            'num_verts': v,
            'num_faces': f,
            'directed': d,
            'backend': b,
        })
    benchmark(
        TestGraphConv.graph_conv_forward_backward,
        'GRAPH CONV',
        kwargs_list,
        warmup_iters=1,
    )
Esempio n. 6
0
def bm_chamfer() -> None:
    kwargs_list_naive = [
        {"batch_size": 1, "P1": 32, "P2": 64, "return_normals": False},
        {"batch_size": 1, "P1": 32, "P2": 64, "return_normals": True},
        {"batch_size": 32, "P1": 32, "P2": 64, "return_normals": False},
    ]
    benchmark(
        TestChamfer.chamfer_naive_with_init,
        "CHAMFER_NAIVE",
        kwargs_list_naive,
        warmup_iters=1,
    )

    if torch.cuda.is_available():
        kwargs_list = kwargs_list_naive + [
            {"batch_size": 1, "P1": 1000, "P2": 3000, "return_normals": False},
            {"batch_size": 1, "P1": 1000, "P2": 30000, "return_normals": True},
        ]
        benchmark(
            TestChamfer.chamfer_with_init,
            "CHAMFER",
            kwargs_list,
            warmup_iters=1,
        )
# Helpful comments below.# Helpful comments below.# Helpful comments below.# Helpful comments below.# Helpful comments below.
Esempio n. 7
0
def bm_graph_conv() -> None:
    backends = ["cpu"]
    if torch.cuda.is_available():
        backends.append("cuda")

    kwargs_list = []
    gconv_dim = [128, 256]
    num_meshes = [32, 64]
    num_verts = [100]
    num_faces = [1000]
    directed = [False, True]
    test_cases = product(gconv_dim, num_meshes, num_verts, num_faces, directed,
                         backends)
    for case in test_cases:
        g, n, v, f, d, b = case
        kwargs_list.append({
            "gconv_dim": g,
            "num_meshes": n,
            "num_verts": v,
            "num_faces": f,
            "directed": d,
            "backend": b,
        })
    benchmark(
        TestGraphConv.graph_conv_forward_backward,
        "GRAPH CONV",
        kwargs_list,
        warmup_iters=1,
    )
Esempio n. 8
0
def bm_knn() -> None:

    backends = ["cpu", "cuda:0"]

    kwargs_list = []
    Ns = [32]
    P1s = [256]
    P2s = [128, 512]
    Ds = [3]
    Ks = [24]
    test_cases = product(Ns, P1s, P2s, Ds, Ks, backends)
    for case in test_cases:
        N, P1, P2, D, K, b = case
        kwargs_list.append({
            "N": N,
            "P1": P1,
            "P2": P2,
            "D": D,
            "K": K,
            "device": b
        })

    benchmark(TestKNN.knn_square, "KNN_SQUARE", kwargs_list, warmup_iters=1)

    benchmark(TestKNN.knn_ragged, "KNN_RAGGED", kwargs_list, warmup_iters=1)
def bm_sample_points() -> None:

    backend = ["cpu"]
    if torch.cuda.is_available():
        backend.append("cuda:0")
    kwargs_list = []
    num_meshes = [2, 10, 32]
    num_verts = [100, 1000]
    num_faces = [300, 3000]
    num_samples = [5000, 10000]
    test_cases = product(num_meshes, num_verts, num_faces, num_samples,
                         backend)
    for case in test_cases:
        n, v, f, s, b = case
        kwargs_list.append({
            "num_meshes": n,
            "num_verts": v,
            "num_faces": f,
            "num_samples": s,
            "device": b,
        })
    benchmark(
        TestSamplePoints.sample_points_with_init,
        "SAMPLE_MESH",
        kwargs_list,
        warmup_iters=1,
    )
Esempio n. 10
0
def bm_marching_cubes() -> None:
    kwargs_list = [
        {
            "batch_size": 1,
            "V": 5
        },
        {
            "batch_size": 1,
            "V": 10
        },
        {
            "batch_size": 1,
            "V": 20
        },
        {
            "batch_size": 1,
            "V": 40
        },
        {
            "batch_size": 5,
            "V": 5
        },
        {
            "batch_size": 20,
            "V": 20
        },
    ]
    benchmark(
        TestMarchingCubes.marching_cubes_with_init,
        "MARCHING_CUBES",
        kwargs_list,
        warmup_iters=1,
    )
Esempio n. 11
0
def bm_compute_packed_padded_meshes() -> None:
    devices = ["cpu"]
    if torch.cuda.is_available():
        devices.append("cuda")

    kwargs_list = []
    num_meshes = [32, 128]
    max_v = [100, 1000, 10000]
    max_f = [300, 3000, 30000]
    test_cases = product(num_meshes, max_v, max_f, devices)
    for case in test_cases:
        n, v, f, d = case
        kwargs_list.append({
            "num_meshes": n,
            "max_v": v,
            "max_f": f,
            "device": d
        })
    benchmark(
        TestMeshes.compute_packed_with_init,
        "COMPUTE_PACKED",
        kwargs_list,
        warmup_iters=1,
    )
    benchmark(
        TestMeshes.compute_padded_with_init,
        "COMPUTE_PADDED",
        kwargs_list,
        warmup_iters=1,
    )


# Helpful comments below.# Helpful comments below.# Helpful comments below.
def bm_interpolate_face_attribues() -> None:
    # For now only benchmark on GPU
    if not torch.cuda.is_available():
        return

    Ns = [1, 4]
    Ss = [128]
    Ks = [1, 10, 40]
    Fs = [5000]
    Ds = [1, 3, 16]
    impls = ["python", "cuda"]
    test_cases = product(Ns, Ss, Ks, Fs, Ds, impls)
    kwargs_list = []
    for case in test_cases:
        N, S, K, F, D, impl = case
        kwargs_list.append({
            "N": N,
            "S": S,
            "K": K,
            "F": F,
            "D": D,
            "impl": impl
        })
    benchmark(_bm_forward, "FORWARD", kwargs_list, warmup_iters=3)
    benchmark(_bm_forward_backward,
              "FORWARD+BACKWARD",
              kwargs_list,
              warmup_iters=3)
Esempio n. 13
0
def bm_ball_query() -> None:

    backends = ["cpu", "cuda:0"]

    kwargs_list = []
    Ns = [32]
    P1s = [256]
    P2s = [128, 512]
    Ds = [3, 10]
    Ks = [3, 24, 100]
    Rs = [0.1, 0.2, 5]
    test_cases = product(Ns, P1s, P2s, Ds, Ks, Rs, backends)
    for case in test_cases:
        N, P1, P2, D, K, R, b = case
        kwargs_list.append({
            "N": N,
            "P1": P1,
            "P2": P2,
            "D": D,
            "K": K,
            "radius": R,
            "device": b
        })

    benchmark(TestBallQuery.ball_query_square,
              "BALLQUERY_SQUARE",
              kwargs_list,
              warmup_iters=1)
    benchmark(TestBallQuery.ball_query_ragged,
              "BALLQUERY_RAGGED",
              kwargs_list,
              warmup_iters=1)
Esempio n. 14
0
def bm_vert_align() -> None:
    devices = ["cpu"]
    if torch.cuda.is_available():
        devices.append("cuda")

    kwargs_list = []
    num_meshes = [2, 10, 32]
    num_verts = [100, 1000]
    num_faces = [300, 3000]
    test_cases = product(num_meshes, num_verts, num_faces, devices)
    for case in test_cases:
        n, v, f, d = case
        kwargs_list.append({
            "num_meshes": n,
            "num_verts": v,
            "num_faces": f,
            "device": d
        })

    benchmark(
        TestVertAlign.vert_align_with_init,
        "VERT_ALIGN",
        kwargs_list,
        warmup_iters=1,
    )
Esempio n. 15
0
def bm_blending() -> None:
    devices = ["cpu", "cuda"]
    kwargs_list = []
    num_meshes = [16]
    image_size = [128, 256]
    faces_per_pixel = [50, 100]
    test_cases = product(num_meshes, image_size, faces_per_pixel, devices)

    for case in test_cases:
        n, s, k, d = case
        kwargs_list.append({
            "num_meshes": n,
            "image_size": s,
            "faces_per_pixel": k,
            "device": d
        })

    benchmark(
        TestBlending.bm_sigmoid_alpha_blending,
        "SIGMOID_ALPHA_BLENDING_PYTORCH",
        kwargs_list,
        warmup_iters=1,
    )

    benchmark(
        TestBlending.bm_softmax_blending,
        "SOFTMAX_BLENDING_PYTORCH",
        kwargs_list,
        warmup_iters=1,
    )
Esempio n. 16
0
def bm_compute_packed_padded_meshes() -> None:
    devices = ['cpu']
    if torch.cuda.is_available():
        devices.append('cuda')

    kwargs_list = []
    num_meshes = [32, 128]
    max_v = [100, 1000, 10000]
    max_f = [300, 3000, 30000]
    test_cases = product(num_meshes, max_v, max_f, devices)
    for case in test_cases:
        n, v, f, d = case
        kwargs_list.append(
            {'num_meshes': n, 'max_v': v, 'max_f': f, 'device': d}
        )
    benchmark(
        TestMeshes.compute_packed_with_init,
        'COMPUTE_PACKED',
        kwargs_list,
        warmup_iters=1,
    )
    benchmark(
        TestMeshes.compute_padded_with_init,
        'COMPUTE_PADDED',
        kwargs_list,
        warmup_iters=1,
    )
def benchmark_paste():
    S = 800
    H, W = image_shape = (S, S)
    N = 64
    torch.manual_seed(42)
    masks = torch.rand(N, 28, 28)

    center = torch.rand(N, 2) * 600 + 100
    wh = torch.clamp(torch.randn(N, 2) * 40 + 200, min=50)
    x0y0 = torch.clamp(center - wh * 0.5, min=0.0)
    x1y1 = torch.clamp(center + wh * 0.5, max=S)
    boxes = Boxes(torch.cat([x0y0, x1y1], axis=1))

    def func(device, n=3):
        m = masks.to(device=device)
        b = boxes.to(device=device)

        def bench():
            for _ in range(n):
                paste_masks_in_image(m, b, image_shape)
            if device.type == "cuda":
                torch.cuda.synchronize()

        return bench

    specs = [{"device": torch.device("cpu"), "n": 3}]
    if torch.cuda.is_available():
        specs.append({"device": torch.device("cuda"), "n": 3})

    benchmark(func, "paste_masks", specs, num_iters=10, warmup_iters=2)
def bm_sample_points() -> None:

    backend = ['cpu']
    if torch.cuda.is_available():
        backend.append('cuda:0')
    kwargs_list = []
    num_meshes = [2, 10, 32]
    num_verts = [100, 1000]
    num_faces = [300, 3000]
    num_samples = [5000, 10000]
    test_cases = product(num_meshes, num_verts, num_faces, num_samples, backend)
    for case in test_cases:
        n, v, f, s, b = case
        kwargs_list.append(
            {
                'num_meshes': n,
                'num_verts': v,
                'num_faces': f,
                'num_samples': s,
                'device': b,
            }
        )
    benchmark(
        TestSamplePoints.sample_points_with_init,
        'SAMPLE_MESH',
        kwargs_list,
        warmup_iters=1,
    )
Esempio n. 19
0
def bm_cubify() -> None:
    kwargs_list = [
        {"batch_size": 32, "V": 16},
        {"batch_size": 64, "V": 16},
        {"batch_size": 16, "V": 32},
    ]
    benchmark(
        TestCubify.cubify_with_init, "CUBIFY", kwargs_list, warmup_iters=1
    )
Esempio n. 20
0
def bm_so3() -> None:
    kwargs_list = [
        {"batch_size": 1},
        {"batch_size": 10},
        {"batch_size": 100},
        {"batch_size": 1000},
    ]
    benchmark(TestSO3.so3_expmap, "SO3_EXP", kwargs_list, warmup_iters=1)
    benchmark(TestSO3.so3_logmap, "SO3_LOG", kwargs_list, warmup_iters=1)
Esempio n. 21
0
def benchmark_roi_align():
    def random_boxes(mean_box, stdev, N, maxsize):
        ret = torch.rand(N, 4) * stdev + torch.tensor(mean_box,
                                                      dtype=torch.float)
        ret.clamp_(min=0, max=maxsize)
        return ret

    def func(shape, nboxes_per_img, sampling_ratio, device, box_size="large"):
        N, _, H, _ = shape
        input = torch.rand(*shape)
        boxes = []
        batch_idx = []
        for k in range(N):
            if box_size == "large":
                b = random_boxes([80, 80, 130, 130], 24, nboxes_per_img, H)
            else:
                b = random_boxes([100, 100, 110, 110], 4, nboxes_per_img, H)
            boxes.append(b)
            batch_idx.append(
                torch.zeros(nboxes_per_img, 1, dtype=torch.float32) + k)
        boxes = torch.cat(boxes, axis=0)
        batch_idx = torch.cat(batch_idx, axis=0)
        boxes = torch.cat([batch_idx, boxes], axis=1)

        input = input.to(device=device)
        boxes = boxes.to(device=device)

        def bench():
            if False and sampling_ratio > 0 and N == 1:
                # enable to benchmark grid_sample (slower)
                grid_sample_roi_align(input, boxes[:, 1:], 7, 1.0,
                                      sampling_ratio)
            else:
                roi_align(input, boxes, 7, 1.0, sampling_ratio, True)
            if device == "cuda":
                torch.cuda.synchronize()

        return bench

    def gen_args(arg):
        args = []
        for size in ["small", "large"]:
            for ratio in [0, 2]:
                args.append(copy(arg))
                args[-1]["sampling_ratio"] = ratio
                args[-1]["box_size"] = size
        return args

    arg = dict(shape=(1, 512, 256, 256), nboxes_per_img=512, device="cuda")
    benchmark(func,
              "cuda_roialign",
              gen_args(arg),
              num_iters=20,
              warmup_iters=1)
    arg.update({"device": "cpu", "shape": (1, 256, 128, 128)})
    benchmark(func, "cpu_roialign", gen_args(arg), num_iters=5, warmup_iters=1)
Esempio n. 22
0
def bm_faiss_exact(fnames):
  Ns = [1, 2, 4, 8]
  Ks = [1, 2, 4, 8, 16, 32]
  test_cases = product(fnames, Ns, Ks)
  kwargs_list = []

  for case in test_cases:
    fname, N, K = case
    kwargs_list.append({"fname":fname.split("/")[-1], "N":N, "K":K})

  benchmark(Compare.faiss_exact, "faiss_exact", kwargs_list, warmup_iters=1)
Esempio n. 23
0
def bm_mesh_edge_loss() -> None:
    kwargs_list = []
    num_meshes = [1, 16, 32]
    max_v = [100, 10000]
    max_f = [300, 30000]
    test_cases = product(num_meshes, max_v, max_f)
    for case in test_cases:
        n, v, f = case
        kwargs_list.append({"num_meshes": n, "max_v": v, "max_f": f})
    benchmark(
        TestMeshEdgeLoss.mesh_edge_loss, "MESH_EDGE_LOSS", kwargs_list, warmup_iters=1
    )
Esempio n. 24
0
def bm_chamfer() -> None:
    devices = ['cpu']
    if torch.cuda.is_available():
        devices.append('cuda:0')

    kwargs_list_naive = []
    batch_size = [1, 32]
    return_normals = [True, False]
    test_cases = product(batch_size, return_normals, devices)

    for case in test_cases:
        b, n, d = case
        kwargs_list_naive.append({
            'batch_size': b,
            'P1': 32,
            'P2': 64,
            'return_normals': n,
            'device': d,
        })

    benchmark(
        TestChamfer.chamfer_naive_with_init,
        'CHAMFER_NAIVE',
        kwargs_list_naive,
        warmup_iters=1,
    )

    if torch.cuda.is_available():
        device = 'cuda:0'
        kwargs_list = []
        batch_size = [1, 32]
        P1 = [32, 1000, 10000]
        P2 = [64, 3000, 30000]
        return_normals = [True, False]
        homogeneous = [True, False]
        test_cases = product(batch_size, P1, P2, return_normals, homogeneous)

        for case in test_cases:
            b, p1, p2, n, h = case
            kwargs_list.append({
                'batch_size': b,
                'P1': p1,
                'P2': p2,
                'return_normals': n,
                'homogeneous': h,
                'device': device,
            })
        benchmark(
            TestChamfer.chamfer_with_init,
            'CHAMFER',
            kwargs_list,
            warmup_iters=1,
        )
Esempio n. 25
0
def bm_chamfer() -> None:
    # Currently disabled.
    return
    devices = ["cpu"]
    if torch.cuda.is_available():
        devices.append("cuda:0")

    kwargs_list_naive = []
    batch_size = [1, 32]
    return_normals = [True, False]
    test_cases = product(batch_size, return_normals, devices)

    for case in test_cases:
        b, n, d = case
        kwargs_list_naive.append({
            "batch_size": b,
            "P1": 32,
            "P2": 64,
            "return_normals": n,
            "device": d
        })

    benchmark(
        TestChamfer.chamfer_naive_with_init,
        "CHAMFER_NAIVE",
        kwargs_list_naive,
        warmup_iters=1,
    )

    if torch.cuda.is_available():
        device = "cuda:0"
        kwargs_list = []
        batch_size = [1, 32]
        P1 = [32, 1000, 10000]
        P2 = [64, 3000, 30000]
        return_normals = [True, False]
        homogeneous = [True, False]
        test_cases = product(batch_size, P1, P2, return_normals, homogeneous)

        for case in test_cases:
            b, p1, p2, n, h = case
            kwargs_list.append({
                "batch_size": b,
                "P1": p1,
                "P2": p2,
                "return_normals": n,
                "homogeneous": h,
                "device": device,
            })
        benchmark(TestChamfer.chamfer_with_init,
                  "CHAMFER",
                  kwargs_list,
                  warmup_iters=1)
Esempio n. 26
0
def bm_acos_linear_extrapolation() -> None:
    kwargs_list = [
        {"batch_size": 1},
        {"batch_size": 100},
        {"batch_size": 10000},
        {"batch_size": 1000000},
    ]
    benchmark(
        TestAcosLinearExtrapolation.acos_linear_extrapolation,
        "ACOS_LINEAR_EXTRAPOLATION",
        kwargs_list,
        warmup_iters=1,
    )
Esempio n. 27
0
def bm_subdivide() -> None:
    kwargs_list = []
    num_meshes = [1, 16, 32]
    same_topo = [True, False]
    test_cases = product(num_meshes, same_topo)
    for case in test_cases:
        n, s = case
        kwargs_list.append({"num_meshes": n, "same_topo": s})
    benchmark(
        TestSubdivideMeshes.subdivide_meshes_with_init,
        "SUBDIVIDE",
        kwargs_list,
        warmup_iters=1,
    )
Esempio n. 28
0
def bm_save_load() -> None:
    kwargs_list = [
        {
            "V": 100,
            "F": 300
        },
        {
            "V": 1000,
            "F": 3000
        },
        {
            "V": 10000,
            "F": 30000
        },
    ]
    benchmark(
        TestMeshObjIO.load_obj_with_init,
        "LOAD_OBJ",
        kwargs_list,
        warmup_iters=1,
    )
    benchmark(
        TestMeshObjIO.save_obj_with_init,
        "SAVE_OBJ",
        kwargs_list,
        warmup_iters=1,
    )
    benchmark(TestMeshPlyIO.load_ply_bm,
              "LOAD_PLY",
              kwargs_list,
              warmup_iters=1)
    benchmark(TestMeshPlyIO.save_ply_bm,
              "SAVE_PLY",
              kwargs_list,
              warmup_iters=1)
Esempio n. 29
0
def bm_chamfer() -> None:
    kwargs_list_naive = [
        {
            "batch_size": 1,
            "P1": 32,
            "P2": 64,
            "return_normals": False
        },
        {
            "batch_size": 1,
            "P1": 32,
            "P2": 64,
            "return_normals": True
        },
        {
            "batch_size": 32,
            "P1": 32,
            "P2": 64,
            "return_normals": False
        },
    ]
    benchmark(
        TestChamfer.chamfer_naive_with_init,
        "CHAMFER_NAIVE",
        kwargs_list_naive,
        warmup_iters=1,
    )

    if torch.cuda.is_available():
        kwargs_list = []
        batch_size = [1, 32]
        P1 = [32, 1000, 10000]
        P2 = [64, 3000, 30000]
        return_normals = [True, False]
        homogeneous = [True, False]
        test_cases = product(batch_size, P1, P2, return_normals, homogeneous)

        for case in test_cases:
            b, p1, p2, n, h = case
            kwargs_list.append({
                "batch_size": b,
                "P1": p1,
                "P2": p2,
                "return_normals": n,
                "homogeneous": h,
            })
        benchmark(TestChamfer.chamfer_with_init,
                  "CHAMFER",
                  kwargs_list,
                  warmup_iters=1)
def bm_mesh_rasterizer_transform() -> None:
    if torch.cuda.is_available():
        kwargs_list = []
        num_meshes = [1, 8]
        ico_level = [0, 1, 3, 4]
        test_cases = product(num_meshes, ico_level)
        for case in test_cases:
            n, ic = case
            kwargs_list.append({'num_meshes': n, 'ico_level': ic})
        benchmark(
            rasterize_transform_with_init,
            'MESH_RASTERIZER',
            kwargs_list,
            warmup_iters=1,
        )