def testSpMaxPool3d(self): np.random.seed(485) devices = ["cuda:0", "cpu:0"] shapes = [[19, 18, 17]] batchsizes = [1, 2] in_channels = [64] out_channels = [64] ksizes = [2, 3] strides = [1, 2, 3] paddings = [0, 1] dilations = [1, 2, 3] for dev, shape, bs, IC, OC, k, s, p, d in params_grid( devices, shapes, batchsizes, in_channels, out_channels, ksizes, strides, paddings, dilations): if all([s > 1, d > 1]): continue # don't support this. device = torch.device(dev) num_points = [1000] * bs # when data contains negative, sparse maxpool is not equal to dense maxpool. sparse_dict = generate_sparse_data(shape, num_points, IC, data_range=[0.1, 1]) features = np.ascontiguousarray(sparse_dict["features"]).astype(np.float32) indices = np.ascontiguousarray(sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32) features_dense = sparse_dict["features_dense"].astype(np.float32) filters = np.random.uniform(0, 1, size=[k, k, k, IC, OC]).astype(np.float32) indices_t = torch.from_numpy(indices).int().to(device) features_t = torch.from_numpy(features).to(device) features_t.requires_grad = True features_dense_t = torch.from_numpy(features_dense).to(device) features_dense_t.requires_grad = True net = SparseMaxPoolTestTorch(1, 3, shape, k, s, p, d).to(device) net_ref = MaxPool3dTestTorch(1, 3, shape, k, s, p, d).to(device) out_ref = net_ref(features_dense_t) out = net(features_t, indices_t, bs) outids = out.indices outfeatures = out.features out_dense = out.dense(channels_first=False) out = out_dense.permute(0, 4, 1, 2, 3).contiguous() dout_sparse = np.random.uniform(-0.2, 0.2, outfeatures.shape).astype(features.dtype) dout_sparse_t = torch.from_numpy(dout_sparse).to(device) dout_t = scatter_nd(outids.long(), dout_sparse_t, list(out_dense.shape)) dout_t = dout_t.permute(0, 4, 1, 2, 3).contiguous() out.backward(dout_t) out_ref.backward(dout_t) din_dense = features_dense_t.grad.detach().permute(0, 2, 3, 4, 1).contiguous() din_sparse = gather_nd(din_dense, indices_t.long()) din = features_t.grad.detach() din_np = din.cpu().numpy() din_sparse_np = din_sparse.cpu().numpy() self.assertAllClose(din_np, din_sparse_np, atol=1e-4) out_np = out.detach().cpu().numpy() out_ref_np = out_ref.detach().cpu().numpy() self.assertAllClose(out_np, out_ref_np, atol=1e-4)
def testSpDeConv3d(self): np.random.seed(484) devices = ["cuda:0", "cpu:0"] shapes = [[19, 18, 17]] batchsizes = [1, 2] in_channels = [64] out_channels = [32, 48, 64] ksizes = [2, 3] strides = [2, 3] paddings = [0, 1, 2] dilations = [1, 2, 3] for dev, shape, bs, IC, OC, k, s, p, d in params_grid( devices, shapes, batchsizes, in_channels, out_channels, ksizes, strides, paddings, dilations): if all([s > 1, d > 1]): continue # don't support this. device = torch.device(dev) num_points = [1000] * bs sparse_dict = generate_sparse_data(shape, num_points, IC) features = np.ascontiguousarray(sparse_dict["features"]).astype(np.float32) indices = np.ascontiguousarray(sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32) features_dense = sparse_dict["features_dense"].astype(np.float32) filters = np.random.uniform(0, 1, size=[k, k, k, IC, OC]).astype(np.float32) indices_t = torch.from_numpy(indices).int().to(device) features_t = torch.from_numpy(features).to(device) features_t.requires_grad = True features_dense_t = torch.from_numpy(features_dense).to(device) features_dense_t.requires_grad = True net = SparseDeConv3dTestTorch(1, 3, shape, IC, OC, k, s, p, d).to(device) net_ref = DeConv3dTestTorch(1, 3, shape, IC, OC, k, s, p, d).to(device) filters_t = torch.from_numpy(filters).to(device) net_ref.net[0].weight.data[:] = filters_t.permute(3, 4, 0, 1, 2).contiguous() net.net[0].weight.data[:] = filters_t out_ref = net_ref(features_dense_t) out = net(features_t, indices_t, bs).dense() dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype) dout_t = torch.from_numpy(dout).to(device) out.backward(dout_t) out_ref.backward(dout_t) din_dense = features_dense_t.grad.detach().permute(0, 2, 3, 4, 1).contiguous() din_sparse = gather_nd(din_dense, indices_t.long()) din = features_t.grad.detach() din_np = din.cpu().numpy() din_sparse_np = din_sparse.cpu().numpy() self.assertAllClose(din_np, din_sparse_np, atol=1e-4) for layer, layer_ref in zip(net.net, net_ref.net): dw = layer.weight.grad.detach().cpu().numpy() dw_ref = layer_ref.weight.grad.detach().cpu().numpy() dw = dw.transpose(3, 4, 0, 1, 2) self.assertAllClose(dw, dw_ref, atol=1e-4) out_np = out.detach().cpu().numpy() out_ref_np = out_ref.detach().cpu().numpy() self.assertAllClose(out_np, out_ref_np, atol=1e-4)
def main(): # function for develop. np.random.seed(484) devices = ["cuda:0"] shapes = [[50, 30, 30]] batchsizes = [3] in_channels = [256] out_channels = [256] ksizes = [3] strides = [1] paddings = [0] dilations = [1] for dev, shape, bs, IC, OC, k, s, p, d in params_grid( devices, shapes, batchsizes, in_channels, out_channels, ksizes, strides, paddings, dilations): if all([s > 1, d > 1]): continue device = torch.device(dev) num_points = [5000] * bs sparse_dict = generate_sparse_data(shape, num_points, IC) features = np.ascontiguousarray(sparse_dict["features"]).astype( np.float32) indices = np.ascontiguousarray( sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32) features_dense = sparse_dict["features_dense"].astype(np.float32) indices_t = torch.from_numpy(indices) filters = np.random.uniform(0, 1, size=[k, k, k, IC, OC]).astype(np.float32) indices_t = torch.from_numpy(indices).int().to(device).half() features_t = torch.from_numpy(features).to(device).half() features_dense_t = torch.from_numpy(features_dense).to(device).half() net = SparseConv3dTestTorch(1, 3, shape, IC, OC, k, s, p, d).to(device).half() net_ref = Conv3dTestTorch(1, 3, shape, IC, OC, k, s, p, d).to(device).half() filters_t = torch.from_numpy(filters).to(device).half() net_ref.net[0].weight[:] = filters_t.permute(4, 3, 0, 1, 2).contiguous() net.net[0].weight[:] = filters_t out_ref = net_ref(features_dense_t) times = [] for i in range(30): t = time.time() out = net(features_t, indices_t, bs) torch.cuda.synchronize() times.append(time.time() - t) # print((net.grid == -1).float().sum(), net.grid.numel()) # print("spconv time", time.time() - t) print("spconv time", np.mean(times[2:])) out = net(features_t, indices_t, bs).dense() print( np.linalg.norm(out.detach().cpu().numpy() - out_ref.detach().cpu().numpy()))
def testSpCpConv3d(self): np.random.seed(484) devices = ["cuda:0", "cpu:0"] shapes = [[20, 20, 20]] batchsizes = [1, 2] in_channels = [64] out_channels = [32, 48, 64] ksizes = [2] strides = [2] paddings = [0, 1, 2] dilations = [1, 2, 3] for dev, shape, bs, IC, OC, k, s in params_grid( devices, shapes, batchsizes, in_channels, out_channels, ksizes, strides): device = torch.device(dev) num_points = [1000] * bs sparse_dict = generate_sparse_data(shape, num_points, IC) features = np.ascontiguousarray(sparse_dict["features"]).astype(np.float32) indices = np.ascontiguousarray(sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32) features_dense = sparse_dict["features_dense"].astype(np.float32) filters = np.random.uniform(0, 1, size=[k, k, k, IC, OC]).astype(np.float32) indices_t = torch.from_numpy(indices).int().to(device) indices_scn_t = torch.from_numpy(indices[:, [1, 2, 3, 0]]).int().to(device) features_t = torch.from_numpy(features).to(device) features_t.requires_grad = True features_ref_t = torch.from_numpy(features).to(device) features_ref_t.requires_grad = True net_ref = SCNCoupleDeConvTest(1, 3, shape, IC, OC, k, s).to(device) net = SparseCoupleDeConvTest(1, 3, shape, IC, OC, k, s).to(device) net_ref.net[0].weight.data[:] = net.net[0].weight.data[:].view(*net_ref.net[0].weight.shape) net_ref.net[1].weight.data[:] = net.net[1].weight.data[:].view(*net_ref.net[1].weight.shape) out_ref = net_ref(features_ref_t, indices_scn_t, bs) out = net(features_t, indices_t, bs) dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype) dout_t = torch.from_numpy(dout).to(device) out.backward(dout_t) out_ref.backward(dout_t) din = features_t.grad.detach() din_ref = features_ref_t.grad.detach() din_np = din.cpu().numpy() din_ref_np = din_ref.cpu().numpy() self.assertAllClose(din_ref_np, din_np, atol=1e-4) for layer, layer_ref in zip(net.net, net_ref.net): dw = layer.weight.grad.detach().cpu().numpy() dw_ref = layer_ref.weight.grad.detach().cpu().view(*dw.shape).numpy() self.assertAllClose(dw, dw_ref, atol=1e-4) out_np = out.detach().cpu().numpy() out_ref_np = out_ref.detach().cpu().numpy() self.assertAllClose(out_np, out_ref_np, atol=1e-4)
def main_subm(algo, dtype=torch.float32): # function for develop. np.random.seed(484) torch.manual_seed(50051) # devices = ["cuda:0"] devices = ["cuda:0"] shapes = [[400, 400, 15]] batchsizes = [2] in_channels = [32] out_channels = [64] ksizes = [(3, 3, 3)] strides = [1] paddings = [1] dilations = [1] for dev, shape, bs, IC, OC, k, s, p, d in params_grid( devices, shapes, batchsizes, in_channels, out_channels, ksizes, strides, paddings, dilations): if all([s > 1, d > 1]): continue device = torch.device(dev) num_points = [120000] * bs sparse_dict = generate_sparse_data(shape, num_points, IC) features = np.ascontiguousarray(sparse_dict["features"]).astype( np.float32) indices = np.ascontiguousarray( sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32) features_dense = sparse_dict["features_dense"].astype(np.float32) indices_t = torch.from_numpy(indices) filters = np.random.uniform(0, 1, size=[k[0], 1, 1, IC, OC]).astype(np.float32) indices_t = torch.from_numpy(indices).int().to(device).to(dtype) features_t = torch.from_numpy(features).to(device).to(dtype) features_dense_t = torch.from_numpy(features_dense).to(device).to( dtype) net = SubMConv3dTestTorch(1, 3, shape, IC, OC, k, s, p, d, algo=algo).to(device).to(dtype) net_ref = Conv3dTestTorch(1, 3, shape, IC, OC, k, s, p, d).to(device).to(dtype) filters_t = torch.from_numpy(filters).to(device).to(dtype) net_ref.net[0].weight[:] = filters_t.permute(4, 3, 0, 1, 2).contiguous() net.net[0].weight[:] = filters_t out_ref = net_ref(features_dense_t) times = [] for i in range(20): t = time.time() out = net(features_t, indices_t, bs) torch.cuda.synchronize() times.append(time.time() - t) # print((net.grid == -1).float().sum(), net.grid.numel()) # print("spconv time", time.time() - t) print("spconv time", np.mean(times[10:])) out = net(features_t, indices_t, bs) # print(out.indices) out = out.dense() out_numpy = out.detach().cpu().numpy() # print( # np.linalg.norm(out.detach().cpu().numpy() - # out_ref.detach().cpu().numpy())) print(out_numpy.min(), out_numpy.max(), out_numpy.mean(), out_numpy.sum()) return out_numpy
def testSpConv3d(self): np.random.seed(484) torch.manual_seed(48848) devices = ["cuda:0"] shapes = [[19, 18, 17]] batchsizes = [1, 2] in_channels = [32] out_channels = [32, 48, 64] ksizes = [2, 3] strides = [1, 2, 3] paddings = [0, 1, 2] dilations = [1, 2, 3] algos = [ ConvAlgo.Native, ConvAlgo.MaskImplicitGemm, ConvAlgo.MaskSplitImplicitGemm ] algos = [ConvAlgo.MaskSplitImplicitGemm] for dev, shape, bs, IC, OC, k, s, p, d, al in params_grid( devices, shapes, batchsizes, in_channels, out_channels, ksizes, strides, paddings, dilations, algos): if all([s > 1, d > 1]): continue # don't support this. print(k, s, p, d) device = torch.device(dev) num_points = [1000] * bs dtype = torch.float32 net = SparseConv3dTestTorch(1, 3, shape, IC, OC, k, s, p, d, algo=al).to(device).to(dtype) net_ref = Conv3dTestTorch(1, 3, shape, IC, OC, k, s, p, d).to(device).to(dtype) sparse_dict = generate_sparse_data(shape, num_points, IC) features = np.ascontiguousarray(sparse_dict["features"]).astype( np.float32) indices = np.ascontiguousarray( sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32) features_dense = sparse_dict["features_dense"].astype(np.float32) indices_t = torch.from_numpy(indices).int().to(device) features_t = torch.from_numpy(features).to(device).to(dtype) features_t.requires_grad = True features_dense_t = torch.from_numpy(features_dense).to(device).to( dtype) features_dense_t.requires_grad = True if net.algo == ConvAlgo.Native: if FILTER_HWIO: filters = np.random.uniform(-1, 1, size=[k, k, k, IC, OC]).astype(np.float32) else: filters = np.random.uniform(-1, 1, size=[k, k, k, OC, IC]).astype(np.float32) filters_t = torch.from_numpy(filters).to(device).to(dtype) if FILTER_HWIO: net_ref.net[0].weight.data[:] = filters_t.permute( 4, 3, 0, 1, 2).contiguous() else: net_ref.net[0].weight.data[:] = filters_t.permute( 3, 4, 0, 1, 2).contiguous() else: filters = np.random.uniform(-1, 1, size=[OC, k, k, k, IC]).astype(np.float32) filters_t = torch.from_numpy(filters).to(device).to(dtype) net_ref.net[0].weight.data[:] = filters_t.permute( 0, 4, 1, 2, 3).contiguous() net.net[0].weight.data[:] = filters_t out_ref = net_ref(features_dense_t) out = net(features_t, indices_t, bs).dense() out_np = out.detach().cpu().numpy() out_ref_np = out_ref.detach().cpu().numpy() self.assertAllClose(out_np, out_ref_np, atol=1e-4) dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype) dout_t = torch.from_numpy(dout).to(device) out.backward(dout_t) out_ref.backward(dout_t) din_dense = features_dense_t.grad.detach().permute(0, 2, 3, 4, 1).contiguous() din_sparse = gather_nd(din_dense, indices_t.long()) din = features_t.grad.detach() din_np = din.cpu().numpy() din_sparse_np = din_sparse.cpu().numpy() for layer, layer_ref in zip(net.net, net_ref.net): dw = layer.weight.grad.detach().cpu().numpy() dw_ref = layer_ref.weight.grad.detach().cpu().numpy() if net.algo == ConvAlgo.Native: if FILTER_HWIO: dw = dw.transpose(4, 3, 0, 1, 2) else: dw = dw.transpose(3, 4, 0, 1, 2) else: # OHWI -> OIHW dw = dw.transpose(0, 4, 1, 2, 3) self.assertAllClose(dw, dw_ref, atol=1e-4) self.assertAllClose(din_np, din_sparse_np, atol=1e-4)