def bm_face_areas_normals() -> None: kwargs_list = [] backend_cuda = [False] if torch.cuda.is_available(): backend_cuda.append(True) num_meshes = [2, 10, 32] num_verts = [100, 1000] num_faces = [300, 3000] test_cases = product(num_meshes, num_verts, num_faces, backend_cuda) for case in test_cases: n, v, f, c = case kwargs_list.append( {"num_meshes": n, "num_verts": v, "num_faces": f, "cuda": c} ) benchmark( TestFaceAreasNormals.face_areas_normals_with_init, "FACE_AREAS_NORMALS", kwargs_list, warmup_iters=1, ) benchmark( TestFaceAreasNormals.face_areas_normals_with_init_torch, "FACE_AREAS_NORMALS_TORCH", kwargs_list, warmup_iters=1, )
def bm_point_mesh_distance() -> None: backend = ["cuda:0"] kwargs_list = [] batch_size = [4, 8, 16] num_verts = [100, 1000] num_faces = [300, 3000] num_points = [5000, 10000] test_cases = product(batch_size, num_verts, num_faces, num_points, backend) for case in test_cases: n, v, f, p, b = case kwargs_list.append({"N": n, "V": v, "F": f, "P": p, "device": b}) benchmark( TestPointMeshDistance.point_mesh_edge, "POINT_MESH_EDGE", kwargs_list, warmup_iters=1, ) benchmark( TestPointMeshDistance.point_mesh_face, "POINT_MESH_FACE", kwargs_list, warmup_iters=1, )
def benchmark_rotated_iou(): num_boxes1 = 200 num_boxes2 = 500 boxes1 = torch.stack([ torch.tensor([5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32) for i in range(num_boxes1) ]) boxes2 = torch.stack([ torch.tensor( [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0], dtype=torch.float32, ) for i in range(num_boxes2) ]) def func(dev, n=1): b1 = boxes1.to(device=dev) b2 = boxes2.to(device=dev) def bench(): for _ in range(n): pairwise_iou_rotated(b1, b2) if dev.type == "cuda": torch.cuda.synchronize() return bench # only run it once per timed loop, since it's slow args = [{"dev": torch.device("cpu"), "n": 1}] if torch.cuda.is_available(): args.append({"dev": torch.device("cuda"), "n": 10}) benchmark(func, "rotated_iou", args, warmup_iters=3)
def bm_blending() -> None: devices = ["cuda"] kwargs_list = [] num_meshes = [8] image_size = [64, 128, 256] faces_per_pixel = [50, 100] backend = ["pytorch", "custom"] test_cases = product(num_meshes, image_size, faces_per_pixel, devices, backend) for case in test_cases: n, s, k, d, b = case kwargs_list.append( { "num_meshes": n, "image_size": s, "faces_per_pixel": k, "device": d, "backend": b, } ) benchmark( TestBlending.bm_sigmoid_alpha_blending, "SIGMOID_ALPHA_BLENDING_PYTORCH", kwargs_list, warmup_iters=1, ) kwargs_list = [case for case in kwargs_list if case["backend"] == "pytorch"] benchmark( TestBlending.bm_softmax_blending, "SOFTMAX_BLENDING_PYTORCH", kwargs_list, warmup_iters=1, )
def bm_graph_conv() -> None: backends = ['cpu'] if torch.cuda.is_available(): backends.append('cuda') kwargs_list = [] gconv_dim = [128, 256] num_meshes = [32, 64] num_verts = [100] num_faces = [1000] directed = [False, True] test_cases = product(gconv_dim, num_meshes, num_verts, num_faces, directed, backends) for case in test_cases: g, n, v, f, d, b = case kwargs_list.append({ 'gconv_dim': g, 'num_meshes': n, 'num_verts': v, 'num_faces': f, 'directed': d, 'backend': b, }) benchmark( TestGraphConv.graph_conv_forward_backward, 'GRAPH CONV', kwargs_list, warmup_iters=1, )
def bm_chamfer() -> None: kwargs_list_naive = [ {"batch_size": 1, "P1": 32, "P2": 64, "return_normals": False}, {"batch_size": 1, "P1": 32, "P2": 64, "return_normals": True}, {"batch_size": 32, "P1": 32, "P2": 64, "return_normals": False}, ] benchmark( TestChamfer.chamfer_naive_with_init, "CHAMFER_NAIVE", kwargs_list_naive, warmup_iters=1, ) if torch.cuda.is_available(): kwargs_list = kwargs_list_naive + [ {"batch_size": 1, "P1": 1000, "P2": 3000, "return_normals": False}, {"batch_size": 1, "P1": 1000, "P2": 30000, "return_normals": True}, ] benchmark( TestChamfer.chamfer_with_init, "CHAMFER", kwargs_list, warmup_iters=1, ) # Helpful comments below.# Helpful comments below.# Helpful comments below.# Helpful comments below.# Helpful comments below.
def bm_graph_conv() -> None: backends = ["cpu"] if torch.cuda.is_available(): backends.append("cuda") kwargs_list = [] gconv_dim = [128, 256] num_meshes = [32, 64] num_verts = [100] num_faces = [1000] directed = [False, True] test_cases = product(gconv_dim, num_meshes, num_verts, num_faces, directed, backends) for case in test_cases: g, n, v, f, d, b = case kwargs_list.append({ "gconv_dim": g, "num_meshes": n, "num_verts": v, "num_faces": f, "directed": d, "backend": b, }) benchmark( TestGraphConv.graph_conv_forward_backward, "GRAPH CONV", kwargs_list, warmup_iters=1, )
def bm_knn() -> None: backends = ["cpu", "cuda:0"] kwargs_list = [] Ns = [32] P1s = [256] P2s = [128, 512] Ds = [3] Ks = [24] test_cases = product(Ns, P1s, P2s, Ds, Ks, backends) for case in test_cases: N, P1, P2, D, K, b = case kwargs_list.append({ "N": N, "P1": P1, "P2": P2, "D": D, "K": K, "device": b }) benchmark(TestKNN.knn_square, "KNN_SQUARE", kwargs_list, warmup_iters=1) benchmark(TestKNN.knn_ragged, "KNN_RAGGED", kwargs_list, warmup_iters=1)
def bm_sample_points() -> None: backend = ["cpu"] if torch.cuda.is_available(): backend.append("cuda:0") kwargs_list = [] num_meshes = [2, 10, 32] num_verts = [100, 1000] num_faces = [300, 3000] num_samples = [5000, 10000] test_cases = product(num_meshes, num_verts, num_faces, num_samples, backend) for case in test_cases: n, v, f, s, b = case kwargs_list.append({ "num_meshes": n, "num_verts": v, "num_faces": f, "num_samples": s, "device": b, }) benchmark( TestSamplePoints.sample_points_with_init, "SAMPLE_MESH", kwargs_list, warmup_iters=1, )
def bm_marching_cubes() -> None: kwargs_list = [ { "batch_size": 1, "V": 5 }, { "batch_size": 1, "V": 10 }, { "batch_size": 1, "V": 20 }, { "batch_size": 1, "V": 40 }, { "batch_size": 5, "V": 5 }, { "batch_size": 20, "V": 20 }, ] benchmark( TestMarchingCubes.marching_cubes_with_init, "MARCHING_CUBES", kwargs_list, warmup_iters=1, )
def bm_compute_packed_padded_meshes() -> None: devices = ["cpu"] if torch.cuda.is_available(): devices.append("cuda") kwargs_list = [] num_meshes = [32, 128] max_v = [100, 1000, 10000] max_f = [300, 3000, 30000] test_cases = product(num_meshes, max_v, max_f, devices) for case in test_cases: n, v, f, d = case kwargs_list.append({ "num_meshes": n, "max_v": v, "max_f": f, "device": d }) benchmark( TestMeshes.compute_packed_with_init, "COMPUTE_PACKED", kwargs_list, warmup_iters=1, ) benchmark( TestMeshes.compute_padded_with_init, "COMPUTE_PADDED", kwargs_list, warmup_iters=1, ) # Helpful comments below.# Helpful comments below.# Helpful comments below.
def bm_interpolate_face_attribues() -> None: # For now only benchmark on GPU if not torch.cuda.is_available(): return Ns = [1, 4] Ss = [128] Ks = [1, 10, 40] Fs = [5000] Ds = [1, 3, 16] impls = ["python", "cuda"] test_cases = product(Ns, Ss, Ks, Fs, Ds, impls) kwargs_list = [] for case in test_cases: N, S, K, F, D, impl = case kwargs_list.append({ "N": N, "S": S, "K": K, "F": F, "D": D, "impl": impl }) benchmark(_bm_forward, "FORWARD", kwargs_list, warmup_iters=3) benchmark(_bm_forward_backward, "FORWARD+BACKWARD", kwargs_list, warmup_iters=3)
def bm_ball_query() -> None: backends = ["cpu", "cuda:0"] kwargs_list = [] Ns = [32] P1s = [256] P2s = [128, 512] Ds = [3, 10] Ks = [3, 24, 100] Rs = [0.1, 0.2, 5] test_cases = product(Ns, P1s, P2s, Ds, Ks, Rs, backends) for case in test_cases: N, P1, P2, D, K, R, b = case kwargs_list.append({ "N": N, "P1": P1, "P2": P2, "D": D, "K": K, "radius": R, "device": b }) benchmark(TestBallQuery.ball_query_square, "BALLQUERY_SQUARE", kwargs_list, warmup_iters=1) benchmark(TestBallQuery.ball_query_ragged, "BALLQUERY_RAGGED", kwargs_list, warmup_iters=1)
def bm_vert_align() -> None: devices = ["cpu"] if torch.cuda.is_available(): devices.append("cuda") kwargs_list = [] num_meshes = [2, 10, 32] num_verts = [100, 1000] num_faces = [300, 3000] test_cases = product(num_meshes, num_verts, num_faces, devices) for case in test_cases: n, v, f, d = case kwargs_list.append({ "num_meshes": n, "num_verts": v, "num_faces": f, "device": d }) benchmark( TestVertAlign.vert_align_with_init, "VERT_ALIGN", kwargs_list, warmup_iters=1, )
def bm_blending() -> None: devices = ["cpu", "cuda"] kwargs_list = [] num_meshes = [16] image_size = [128, 256] faces_per_pixel = [50, 100] test_cases = product(num_meshes, image_size, faces_per_pixel, devices) for case in test_cases: n, s, k, d = case kwargs_list.append({ "num_meshes": n, "image_size": s, "faces_per_pixel": k, "device": d }) benchmark( TestBlending.bm_sigmoid_alpha_blending, "SIGMOID_ALPHA_BLENDING_PYTORCH", kwargs_list, warmup_iters=1, ) benchmark( TestBlending.bm_softmax_blending, "SOFTMAX_BLENDING_PYTORCH", kwargs_list, warmup_iters=1, )
def bm_compute_packed_padded_meshes() -> None: devices = ['cpu'] if torch.cuda.is_available(): devices.append('cuda') kwargs_list = [] num_meshes = [32, 128] max_v = [100, 1000, 10000] max_f = [300, 3000, 30000] test_cases = product(num_meshes, max_v, max_f, devices) for case in test_cases: n, v, f, d = case kwargs_list.append( {'num_meshes': n, 'max_v': v, 'max_f': f, 'device': d} ) benchmark( TestMeshes.compute_packed_with_init, 'COMPUTE_PACKED', kwargs_list, warmup_iters=1, ) benchmark( TestMeshes.compute_padded_with_init, 'COMPUTE_PADDED', kwargs_list, warmup_iters=1, )
def benchmark_paste(): S = 800 H, W = image_shape = (S, S) N = 64 torch.manual_seed(42) masks = torch.rand(N, 28, 28) center = torch.rand(N, 2) * 600 + 100 wh = torch.clamp(torch.randn(N, 2) * 40 + 200, min=50) x0y0 = torch.clamp(center - wh * 0.5, min=0.0) x1y1 = torch.clamp(center + wh * 0.5, max=S) boxes = Boxes(torch.cat([x0y0, x1y1], axis=1)) def func(device, n=3): m = masks.to(device=device) b = boxes.to(device=device) def bench(): for _ in range(n): paste_masks_in_image(m, b, image_shape) if device.type == "cuda": torch.cuda.synchronize() return bench specs = [{"device": torch.device("cpu"), "n": 3}] if torch.cuda.is_available(): specs.append({"device": torch.device("cuda"), "n": 3}) benchmark(func, "paste_masks", specs, num_iters=10, warmup_iters=2)
def bm_sample_points() -> None: backend = ['cpu'] if torch.cuda.is_available(): backend.append('cuda:0') kwargs_list = [] num_meshes = [2, 10, 32] num_verts = [100, 1000] num_faces = [300, 3000] num_samples = [5000, 10000] test_cases = product(num_meshes, num_verts, num_faces, num_samples, backend) for case in test_cases: n, v, f, s, b = case kwargs_list.append( { 'num_meshes': n, 'num_verts': v, 'num_faces': f, 'num_samples': s, 'device': b, } ) benchmark( TestSamplePoints.sample_points_with_init, 'SAMPLE_MESH', kwargs_list, warmup_iters=1, )
def bm_cubify() -> None: kwargs_list = [ {"batch_size": 32, "V": 16}, {"batch_size": 64, "V": 16}, {"batch_size": 16, "V": 32}, ] benchmark( TestCubify.cubify_with_init, "CUBIFY", kwargs_list, warmup_iters=1 )
def bm_so3() -> None: kwargs_list = [ {"batch_size": 1}, {"batch_size": 10}, {"batch_size": 100}, {"batch_size": 1000}, ] benchmark(TestSO3.so3_expmap, "SO3_EXP", kwargs_list, warmup_iters=1) benchmark(TestSO3.so3_logmap, "SO3_LOG", kwargs_list, warmup_iters=1)
def benchmark_roi_align(): def random_boxes(mean_box, stdev, N, maxsize): ret = torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float) ret.clamp_(min=0, max=maxsize) return ret def func(shape, nboxes_per_img, sampling_ratio, device, box_size="large"): N, _, H, _ = shape input = torch.rand(*shape) boxes = [] batch_idx = [] for k in range(N): if box_size == "large": b = random_boxes([80, 80, 130, 130], 24, nboxes_per_img, H) else: b = random_boxes([100, 100, 110, 110], 4, nboxes_per_img, H) boxes.append(b) batch_idx.append( torch.zeros(nboxes_per_img, 1, dtype=torch.float32) + k) boxes = torch.cat(boxes, axis=0) batch_idx = torch.cat(batch_idx, axis=0) boxes = torch.cat([batch_idx, boxes], axis=1) input = input.to(device=device) boxes = boxes.to(device=device) def bench(): if False and sampling_ratio > 0 and N == 1: # enable to benchmark grid_sample (slower) grid_sample_roi_align(input, boxes[:, 1:], 7, 1.0, sampling_ratio) else: roi_align(input, boxes, 7, 1.0, sampling_ratio, True) if device == "cuda": torch.cuda.synchronize() return bench def gen_args(arg): args = [] for size in ["small", "large"]: for ratio in [0, 2]: args.append(copy(arg)) args[-1]["sampling_ratio"] = ratio args[-1]["box_size"] = size return args arg = dict(shape=(1, 512, 256, 256), nboxes_per_img=512, device="cuda") benchmark(func, "cuda_roialign", gen_args(arg), num_iters=20, warmup_iters=1) arg.update({"device": "cpu", "shape": (1, 256, 128, 128)}) benchmark(func, "cpu_roialign", gen_args(arg), num_iters=5, warmup_iters=1)
def bm_faiss_exact(fnames): Ns = [1, 2, 4, 8] Ks = [1, 2, 4, 8, 16, 32] test_cases = product(fnames, Ns, Ks) kwargs_list = [] for case in test_cases: fname, N, K = case kwargs_list.append({"fname":fname.split("/")[-1], "N":N, "K":K}) benchmark(Compare.faiss_exact, "faiss_exact", kwargs_list, warmup_iters=1)
def bm_mesh_edge_loss() -> None: kwargs_list = [] num_meshes = [1, 16, 32] max_v = [100, 10000] max_f = [300, 30000] test_cases = product(num_meshes, max_v, max_f) for case in test_cases: n, v, f = case kwargs_list.append({"num_meshes": n, "max_v": v, "max_f": f}) benchmark( TestMeshEdgeLoss.mesh_edge_loss, "MESH_EDGE_LOSS", kwargs_list, warmup_iters=1 )
def bm_chamfer() -> None: devices = ['cpu'] if torch.cuda.is_available(): devices.append('cuda:0') kwargs_list_naive = [] batch_size = [1, 32] return_normals = [True, False] test_cases = product(batch_size, return_normals, devices) for case in test_cases: b, n, d = case kwargs_list_naive.append({ 'batch_size': b, 'P1': 32, 'P2': 64, 'return_normals': n, 'device': d, }) benchmark( TestChamfer.chamfer_naive_with_init, 'CHAMFER_NAIVE', kwargs_list_naive, warmup_iters=1, ) if torch.cuda.is_available(): device = 'cuda:0' kwargs_list = [] batch_size = [1, 32] P1 = [32, 1000, 10000] P2 = [64, 3000, 30000] return_normals = [True, False] homogeneous = [True, False] test_cases = product(batch_size, P1, P2, return_normals, homogeneous) for case in test_cases: b, p1, p2, n, h = case kwargs_list.append({ 'batch_size': b, 'P1': p1, 'P2': p2, 'return_normals': n, 'homogeneous': h, 'device': device, }) benchmark( TestChamfer.chamfer_with_init, 'CHAMFER', kwargs_list, warmup_iters=1, )
def bm_chamfer() -> None: # Currently disabled. return devices = ["cpu"] if torch.cuda.is_available(): devices.append("cuda:0") kwargs_list_naive = [] batch_size = [1, 32] return_normals = [True, False] test_cases = product(batch_size, return_normals, devices) for case in test_cases: b, n, d = case kwargs_list_naive.append({ "batch_size": b, "P1": 32, "P2": 64, "return_normals": n, "device": d }) benchmark( TestChamfer.chamfer_naive_with_init, "CHAMFER_NAIVE", kwargs_list_naive, warmup_iters=1, ) if torch.cuda.is_available(): device = "cuda:0" kwargs_list = [] batch_size = [1, 32] P1 = [32, 1000, 10000] P2 = [64, 3000, 30000] return_normals = [True, False] homogeneous = [True, False] test_cases = product(batch_size, P1, P2, return_normals, homogeneous) for case in test_cases: b, p1, p2, n, h = case kwargs_list.append({ "batch_size": b, "P1": p1, "P2": p2, "return_normals": n, "homogeneous": h, "device": device, }) benchmark(TestChamfer.chamfer_with_init, "CHAMFER", kwargs_list, warmup_iters=1)
def bm_acos_linear_extrapolation() -> None: kwargs_list = [ {"batch_size": 1}, {"batch_size": 100}, {"batch_size": 10000}, {"batch_size": 1000000}, ] benchmark( TestAcosLinearExtrapolation.acos_linear_extrapolation, "ACOS_LINEAR_EXTRAPOLATION", kwargs_list, warmup_iters=1, )
def bm_subdivide() -> None: kwargs_list = [] num_meshes = [1, 16, 32] same_topo = [True, False] test_cases = product(num_meshes, same_topo) for case in test_cases: n, s = case kwargs_list.append({"num_meshes": n, "same_topo": s}) benchmark( TestSubdivideMeshes.subdivide_meshes_with_init, "SUBDIVIDE", kwargs_list, warmup_iters=1, )
def bm_save_load() -> None: kwargs_list = [ { "V": 100, "F": 300 }, { "V": 1000, "F": 3000 }, { "V": 10000, "F": 30000 }, ] benchmark( TestMeshObjIO.load_obj_with_init, "LOAD_OBJ", kwargs_list, warmup_iters=1, ) benchmark( TestMeshObjIO.save_obj_with_init, "SAVE_OBJ", kwargs_list, warmup_iters=1, ) benchmark(TestMeshPlyIO.load_ply_bm, "LOAD_PLY", kwargs_list, warmup_iters=1) benchmark(TestMeshPlyIO.save_ply_bm, "SAVE_PLY", kwargs_list, warmup_iters=1)
def bm_chamfer() -> None: kwargs_list_naive = [ { "batch_size": 1, "P1": 32, "P2": 64, "return_normals": False }, { "batch_size": 1, "P1": 32, "P2": 64, "return_normals": True }, { "batch_size": 32, "P1": 32, "P2": 64, "return_normals": False }, ] benchmark( TestChamfer.chamfer_naive_with_init, "CHAMFER_NAIVE", kwargs_list_naive, warmup_iters=1, ) if torch.cuda.is_available(): kwargs_list = [] batch_size = [1, 32] P1 = [32, 1000, 10000] P2 = [64, 3000, 30000] return_normals = [True, False] homogeneous = [True, False] test_cases = product(batch_size, P1, P2, return_normals, homogeneous) for case in test_cases: b, p1, p2, n, h = case kwargs_list.append({ "batch_size": b, "P1": p1, "P2": p2, "return_normals": n, "homogeneous": h, }) benchmark(TestChamfer.chamfer_with_init, "CHAMFER", kwargs_list, warmup_iters=1)
def bm_mesh_rasterizer_transform() -> None: if torch.cuda.is_available(): kwargs_list = [] num_meshes = [1, 8] ico_level = [0, 1, 3, 4] test_cases = product(num_meshes, ico_level) for case in test_cases: n, ic = case kwargs_list.append({'num_meshes': n, 'ico_level': ic}) benchmark( rasterize_transform_with_init, 'MESH_RASTERIZER', kwargs_list, warmup_iters=1, )