예제 #1
0
    def testSpMaxPool3d(self):
        np.random.seed(485)
        devices = ["cuda:0", "cpu:0"]
        shapes = [[19, 18, 17]]
        batchsizes = [1, 2]

        in_channels = [64]
        out_channels = [64]
        ksizes = [2, 3]
        strides = [1, 2, 3]
        paddings = [0, 1]
        dilations = [1, 2, 3]

        for dev, shape, bs, IC, OC, k, s, p, d in params_grid(
                devices, shapes, batchsizes, in_channels, out_channels, ksizes,
                strides, paddings, dilations):
            if all([s > 1, d > 1]):
                continue  # don't support this.
            device = torch.device(dev)
            num_points = [1000] * bs
            # when data contains negative, sparse maxpool is not equal to dense maxpool.
            sparse_dict = generate_sparse_data(shape, num_points, IC, data_range=[0.1, 1])

            features = np.ascontiguousarray(sparse_dict["features"]).astype(np.float32)
            indices = np.ascontiguousarray(sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32)
            features_dense = sparse_dict["features_dense"].astype(np.float32)
            filters = np.random.uniform(0, 1, size=[k, k, k, IC, OC]).astype(np.float32)
            indices_t = torch.from_numpy(indices).int().to(device)
            features_t = torch.from_numpy(features).to(device)
            features_t.requires_grad = True
            features_dense_t = torch.from_numpy(features_dense).to(device)
            features_dense_t.requires_grad = True
            net = SparseMaxPoolTestTorch(1, 3, shape, k, s, p, d).to(device)
            net_ref = MaxPool3dTestTorch(1, 3, shape, k, s, p, d).to(device)

            out_ref = net_ref(features_dense_t)
            out = net(features_t, indices_t, bs)
            outids = out.indices
            outfeatures = out.features
            out_dense = out.dense(channels_first=False)
            out = out_dense.permute(0, 4, 1, 2, 3).contiguous()

            dout_sparse = np.random.uniform(-0.2, 0.2, outfeatures.shape).astype(features.dtype)
            dout_sparse_t = torch.from_numpy(dout_sparse).to(device)
            dout_t = scatter_nd(outids.long(), dout_sparse_t, list(out_dense.shape))
            dout_t = dout_t.permute(0, 4, 1, 2, 3).contiguous()
            out.backward(dout_t)
            out_ref.backward(dout_t)
            din_dense = features_dense_t.grad.detach().permute(0, 2, 3, 4, 1).contiguous()
            din_sparse = gather_nd(din_dense, indices_t.long())
            din = features_t.grad.detach()
            din_np = din.cpu().numpy()
            din_sparse_np = din_sparse.cpu().numpy()
            self.assertAllClose(din_np, din_sparse_np, atol=1e-4)

            out_np = out.detach().cpu().numpy()
            out_ref_np = out_ref.detach().cpu().numpy()
            self.assertAllClose(out_np, out_ref_np, atol=1e-4)
예제 #2
0
    def testSpDeConv3d(self):
        np.random.seed(484)
        devices = ["cuda:0", "cpu:0"]
        shapes = [[19, 18, 17]]
        batchsizes = [1, 2]

        in_channels = [64]
        out_channels = [32, 48, 64]
        ksizes = [2, 3]
        strides = [2, 3]
        paddings = [0, 1, 2]
        dilations = [1, 2, 3]

        for dev, shape, bs, IC, OC, k, s, p, d in params_grid(
                devices, shapes, batchsizes, in_channels, out_channels, ksizes,
                strides, paddings, dilations):
            if all([s > 1, d > 1]):
                continue  # don't support this.
            device = torch.device(dev)
            num_points = [1000] * bs

            sparse_dict = generate_sparse_data(shape, num_points, IC)

            features = np.ascontiguousarray(sparse_dict["features"]).astype(np.float32)
            indices = np.ascontiguousarray(sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32)
            features_dense = sparse_dict["features_dense"].astype(np.float32)
            filters = np.random.uniform(0, 1, size=[k, k, k, IC, OC]).astype(np.float32)
            indices_t = torch.from_numpy(indices).int().to(device)
            features_t = torch.from_numpy(features).to(device)
            features_t.requires_grad = True
            features_dense_t = torch.from_numpy(features_dense).to(device)
            features_dense_t.requires_grad = True
            net = SparseDeConv3dTestTorch(1, 3, shape, IC, OC, k, s, p, d).to(device)
            net_ref = DeConv3dTestTorch(1, 3, shape, IC, OC, k, s, p, d).to(device)
            filters_t = torch.from_numpy(filters).to(device)
            net_ref.net[0].weight.data[:] = filters_t.permute(3, 4, 0, 1, 2).contiguous()
            net.net[0].weight.data[:] = filters_t
            out_ref = net_ref(features_dense_t)
            out = net(features_t, indices_t, bs).dense()
            dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype)
            dout_t = torch.from_numpy(dout).to(device)
            out.backward(dout_t)
            out_ref.backward(dout_t)
            din_dense = features_dense_t.grad.detach().permute(0, 2, 3, 4, 1).contiguous()
            din_sparse = gather_nd(din_dense, indices_t.long())
            din = features_t.grad.detach()
            din_np = din.cpu().numpy()
            din_sparse_np = din_sparse.cpu().numpy()
            self.assertAllClose(din_np, din_sparse_np, atol=1e-4)
            for layer, layer_ref in zip(net.net, net_ref.net):
                dw = layer.weight.grad.detach().cpu().numpy()
                dw_ref = layer_ref.weight.grad.detach().cpu().numpy()
                dw = dw.transpose(3, 4, 0, 1, 2)
                self.assertAllClose(dw, dw_ref, atol=1e-4)

            out_np = out.detach().cpu().numpy()
            out_ref_np = out_ref.detach().cpu().numpy()
            self.assertAllClose(out_np, out_ref_np, atol=1e-4)
예제 #3
0
def main():
    # function for develop.
    np.random.seed(484)
    devices = ["cuda:0"]
    shapes = [[50, 30, 30]]
    batchsizes = [3]

    in_channels = [256]
    out_channels = [256]
    ksizes = [3]
    strides = [1]
    paddings = [0]
    dilations = [1]

    for dev, shape, bs, IC, OC, k, s, p, d in params_grid(
            devices, shapes, batchsizes, in_channels, out_channels, ksizes,
            strides, paddings, dilations):
        if all([s > 1, d > 1]):
            continue
        device = torch.device(dev)
        num_points = [5000] * bs

        sparse_dict = generate_sparse_data(shape, num_points, IC)

        features = np.ascontiguousarray(sparse_dict["features"]).astype(
            np.float32)
        indices = np.ascontiguousarray(
            sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32)
        features_dense = sparse_dict["features_dense"].astype(np.float32)
        indices_t = torch.from_numpy(indices)
        filters = np.random.uniform(0, 1, size=[k, k, k, IC,
                                                OC]).astype(np.float32)
        indices_t = torch.from_numpy(indices).int().to(device).half()
        features_t = torch.from_numpy(features).to(device).half()

        features_dense_t = torch.from_numpy(features_dense).to(device).half()
        net = SparseConv3dTestTorch(1, 3, shape, IC, OC, k, s, p,
                                    d).to(device).half()
        net_ref = Conv3dTestTorch(1, 3, shape, IC, OC, k, s, p,
                                  d).to(device).half()
        filters_t = torch.from_numpy(filters).to(device).half()
        net_ref.net[0].weight[:] = filters_t.permute(4, 3, 0, 1,
                                                     2).contiguous()
        net.net[0].weight[:] = filters_t
        out_ref = net_ref(features_dense_t)
        times = []
        for i in range(30):
            t = time.time()
            out = net(features_t, indices_t, bs)
            torch.cuda.synchronize()
            times.append(time.time() - t)
        # print((net.grid == -1).float().sum(), net.grid.numel())
        # print("spconv time", time.time() - t)
        print("spconv time", np.mean(times[2:]))
        out = net(features_t, indices_t, bs).dense()
        print(
            np.linalg.norm(out.detach().cpu().numpy() -
                           out_ref.detach().cpu().numpy()))
예제 #4
0
    def testSpCpConv3d(self):
        np.random.seed(484)
        devices = ["cuda:0", "cpu:0"]
        shapes = [[20, 20, 20]]
        batchsizes = [1, 2]

        in_channels = [64]
        out_channels = [32, 48, 64]
        ksizes = [2]
        strides = [2]
        paddings = [0, 1, 2]
        dilations = [1, 2, 3]

        for dev, shape, bs, IC, OC, k, s in params_grid(
                devices, shapes, batchsizes, in_channels, out_channels, ksizes,
                strides):
            device = torch.device(dev)
            num_points = [1000] * bs

            sparse_dict = generate_sparse_data(shape, num_points, IC)

            features = np.ascontiguousarray(sparse_dict["features"]).astype(np.float32)
            indices = np.ascontiguousarray(sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32)
            features_dense = sparse_dict["features_dense"].astype(np.float32)
            filters = np.random.uniform(0, 1, size=[k, k, k, IC, OC]).astype(np.float32)
            indices_t = torch.from_numpy(indices).int().to(device)
            indices_scn_t = torch.from_numpy(indices[:, [1, 2, 3, 0]]).int().to(device)
            features_t = torch.from_numpy(features).to(device)
            features_t.requires_grad = True
            features_ref_t = torch.from_numpy(features).to(device)
            features_ref_t.requires_grad = True

            net_ref = SCNCoupleDeConvTest(1, 3, shape, IC, OC, k, s).to(device)
            net = SparseCoupleDeConvTest(1, 3, shape, IC, OC, k, s).to(device)
            net_ref.net[0].weight.data[:] = net.net[0].weight.data[:].view(*net_ref.net[0].weight.shape)
            net_ref.net[1].weight.data[:] = net.net[1].weight.data[:].view(*net_ref.net[1].weight.shape)
            out_ref = net_ref(features_ref_t, indices_scn_t, bs)
            out = net(features_t, indices_t, bs)
            dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype)
            dout_t = torch.from_numpy(dout).to(device)
            out.backward(dout_t)
            out_ref.backward(dout_t)
            din = features_t.grad.detach()
            din_ref = features_ref_t.grad.detach()
            din_np = din.cpu().numpy()
            din_ref_np = din_ref.cpu().numpy()
            self.assertAllClose(din_ref_np, din_np, atol=1e-4)
            for layer, layer_ref in zip(net.net, net_ref.net):
                dw = layer.weight.grad.detach().cpu().numpy()
                dw_ref = layer_ref.weight.grad.detach().cpu().view(*dw.shape).numpy()
                self.assertAllClose(dw, dw_ref, atol=1e-4)

            out_np = out.detach().cpu().numpy()
            out_ref_np = out_ref.detach().cpu().numpy()
            self.assertAllClose(out_np, out_ref_np, atol=1e-4)
예제 #5
0
파일: test_conv.py 프로젝트: xmyqsh/spconv
def main_subm(algo, dtype=torch.float32):
    # function for develop.
    np.random.seed(484)
    torch.manual_seed(50051)
    # devices = ["cuda:0"]
    devices = ["cuda:0"]
    shapes = [[400, 400, 15]]
    batchsizes = [2]

    in_channels = [32]
    out_channels = [64]
    ksizes = [(3, 3, 3)]
    strides = [1]
    paddings = [1]
    dilations = [1]
    for dev, shape, bs, IC, OC, k, s, p, d in params_grid(
            devices, shapes, batchsizes, in_channels, out_channels, ksizes,
            strides, paddings, dilations):
        if all([s > 1, d > 1]):
            continue
        device = torch.device(dev)
        num_points = [120000] * bs

        sparse_dict = generate_sparse_data(shape, num_points, IC)

        features = np.ascontiguousarray(sparse_dict["features"]).astype(
            np.float32)
        indices = np.ascontiguousarray(
            sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32)
        features_dense = sparse_dict["features_dense"].astype(np.float32)
        indices_t = torch.from_numpy(indices)
        filters = np.random.uniform(0, 1, size=[k[0], 1, 1, IC,
                                                OC]).astype(np.float32)
        indices_t = torch.from_numpy(indices).int().to(device).to(dtype)
        features_t = torch.from_numpy(features).to(device).to(dtype)

        features_dense_t = torch.from_numpy(features_dense).to(device).to(
            dtype)
        net = SubMConv3dTestTorch(1, 3, shape, IC, OC, k, s, p, d,
                                  algo=algo).to(device).to(dtype)
        net_ref = Conv3dTestTorch(1, 3, shape, IC, OC, k, s, p,
                                  d).to(device).to(dtype)
        filters_t = torch.from_numpy(filters).to(device).to(dtype)
        net_ref.net[0].weight[:] = filters_t.permute(4, 3, 0, 1,
                                                     2).contiguous()
        net.net[0].weight[:] = filters_t
        out_ref = net_ref(features_dense_t)
        times = []
        for i in range(20):
            t = time.time()
            out = net(features_t, indices_t, bs)
            torch.cuda.synchronize()
            times.append(time.time() - t)
        # print((net.grid == -1).float().sum(), net.grid.numel())
        # print("spconv time", time.time() - t)
        print("spconv time", np.mean(times[10:]))
        out = net(features_t, indices_t, bs)
        # print(out.indices)
        out = out.dense()
        out_numpy = out.detach().cpu().numpy()
        # print(
        #     np.linalg.norm(out.detach().cpu().numpy() -
        #                    out_ref.detach().cpu().numpy()))
        print(out_numpy.min(), out_numpy.max(), out_numpy.mean(),
              out_numpy.sum())
    return out_numpy
예제 #6
0
파일: test_conv.py 프로젝트: xmyqsh/spconv
    def testSpConv3d(self):
        np.random.seed(484)
        torch.manual_seed(48848)
        devices = ["cuda:0"]
        shapes = [[19, 18, 17]]
        batchsizes = [1, 2]

        in_channels = [32]
        out_channels = [32, 48, 64]
        ksizes = [2, 3]
        strides = [1, 2, 3]
        paddings = [0, 1, 2]
        dilations = [1, 2, 3]
        algos = [
            ConvAlgo.Native, ConvAlgo.MaskImplicitGemm,
            ConvAlgo.MaskSplitImplicitGemm
        ]
        algos = [ConvAlgo.MaskSplitImplicitGemm]

        for dev, shape, bs, IC, OC, k, s, p, d, al in params_grid(
                devices, shapes, batchsizes, in_channels, out_channels, ksizes,
                strides, paddings, dilations, algos):
            if all([s > 1, d > 1]):
                continue  # don't support this.
            print(k, s, p, d)
            device = torch.device(dev)
            num_points = [1000] * bs
            dtype = torch.float32
            net = SparseConv3dTestTorch(1,
                                        3,
                                        shape,
                                        IC,
                                        OC,
                                        k,
                                        s,
                                        p,
                                        d,
                                        algo=al).to(device).to(dtype)
            net_ref = Conv3dTestTorch(1, 3, shape, IC, OC, k, s, p,
                                      d).to(device).to(dtype)

            sparse_dict = generate_sparse_data(shape, num_points, IC)

            features = np.ascontiguousarray(sparse_dict["features"]).astype(
                np.float32)
            indices = np.ascontiguousarray(
                sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32)
            features_dense = sparse_dict["features_dense"].astype(np.float32)
            indices_t = torch.from_numpy(indices).int().to(device)
            features_t = torch.from_numpy(features).to(device).to(dtype)
            features_t.requires_grad = True
            features_dense_t = torch.from_numpy(features_dense).to(device).to(
                dtype)
            features_dense_t.requires_grad = True
            if net.algo == ConvAlgo.Native:
                if FILTER_HWIO:
                    filters = np.random.uniform(-1, 1,
                                                size=[k, k, k, IC,
                                                      OC]).astype(np.float32)
                else:
                    filters = np.random.uniform(-1, 1,
                                                size=[k, k, k, OC,
                                                      IC]).astype(np.float32)
                filters_t = torch.from_numpy(filters).to(device).to(dtype)
                if FILTER_HWIO:
                    net_ref.net[0].weight.data[:] = filters_t.permute(
                        4, 3, 0, 1, 2).contiguous()
                else:
                    net_ref.net[0].weight.data[:] = filters_t.permute(
                        3, 4, 0, 1, 2).contiguous()
            else:
                filters = np.random.uniform(-1, 1,
                                            size=[OC, k, k, k,
                                                  IC]).astype(np.float32)
                filters_t = torch.from_numpy(filters).to(device).to(dtype)
                net_ref.net[0].weight.data[:] = filters_t.permute(
                    0, 4, 1, 2, 3).contiguous()

            net.net[0].weight.data[:] = filters_t
            out_ref = net_ref(features_dense_t)
            out = net(features_t, indices_t, bs).dense()
            out_np = out.detach().cpu().numpy()
            out_ref_np = out_ref.detach().cpu().numpy()
            self.assertAllClose(out_np, out_ref_np, atol=1e-4)

            dout = np.random.uniform(-0.2, 0.2,
                                     out_ref.shape).astype(features.dtype)
            dout_t = torch.from_numpy(dout).to(device)
            out.backward(dout_t)
            out_ref.backward(dout_t)
            din_dense = features_dense_t.grad.detach().permute(0, 2, 3, 4,
                                                               1).contiguous()
            din_sparse = gather_nd(din_dense, indices_t.long())
            din = features_t.grad.detach()

            din_np = din.cpu().numpy()
            din_sparse_np = din_sparse.cpu().numpy()
            for layer, layer_ref in zip(net.net, net_ref.net):
                dw = layer.weight.grad.detach().cpu().numpy()
                dw_ref = layer_ref.weight.grad.detach().cpu().numpy()
                if net.algo == ConvAlgo.Native:
                    if FILTER_HWIO:
                        dw = dw.transpose(4, 3, 0, 1, 2)
                    else:
                        dw = dw.transpose(3, 4, 0, 1, 2)
                else:
                    # OHWI -> OIHW
                    dw = dw.transpose(0, 4, 1, 2, 3)

                self.assertAllClose(dw, dw_ref, atol=1e-4)
            self.assertAllClose(din_np, din_sparse_np, atol=1e-4)