예제 #1
0
    def test_no_overwrite(self, dtype, order, get_mat, expected_lower,
                          expected_upper, device):
        omat = get_mat(order=order, dtype=dtype)
        mat = get_mat(order=order, dtype=dtype, device=device)

        # For cuda inputs we must add to available GPU memory the amount used by the
        # input matrix, since overwrite=False and a full copy must be performed.
        mgpu_slack = 0
        if device.startswith("cuda"):
            mgpu_slack = self.basic_opt.max_gpu_mem + mat.shape[
                0]**2 * sizeof_dtype(mat.dtype)

        with memory_checker(self.basic_opt, extra_mem=mgpu_slack) as new_opt:
            act_up = gpu_lauum(mat, upper=True, overwrite=False, opt=new_opt)
            torch.cuda.synchronize()
        np.testing.assert_allclose(expected_upper,
                                   act_up.cpu().numpy(),
                                   rtol=self.rtol[dtype])
        np.testing.assert_allclose(omat, mat.cpu())

        with memory_checker(self.basic_opt, extra_mem=mgpu_slack) as new_opt:
            act_lo = gpu_lauum(mat, upper=False, overwrite=False, opt=new_opt)
            torch.cuda.synchronize()
        np.testing.assert_allclose(expected_lower,
                                   act_lo.cpu().numpy(),
                                   rtol=self.rtol[dtype])
        np.testing.assert_allclose(omat, mat.cpu())
예제 #2
0
    def test_write_opposite(self, dtype, order, get_mat, expected_lower,
                            expected_upper):
        omat = get_mat(order=order, dtype=dtype).numpy()
        mat = torch.from_numpy(omat.copy(order="K"))
        with memory_checker(self.basic_opt) as new_opt:
            act_up = gpu_lauum(mat,
                               upper=True,
                               overwrite=True,
                               write_opposite=True,
                               opt=new_opt)
        np.testing.assert_allclose(np.triu(omat, k=1),
                                   np.triu(act_up.numpy(), k=1),
                                   rtol=self.rtol[dtype])
        np.testing.assert_allclose(np.tril(act_up.numpy()),
                                   np.triu(expected_upper).T,
                                   rtol=self.rtol[dtype])

        mat = torch.from_numpy(omat.copy(order="K"))
        with memory_checker(self.basic_opt) as new_opt:
            act_lo = gpu_lauum(mat,
                               upper=False,
                               overwrite=True,
                               write_opposite=True,
                               opt=new_opt)
        np.testing.assert_allclose(np.tril(omat, k=-1),
                                   np.tril(act_lo.numpy(), k=-1),
                                   rtol=self.rtol[dtype])
        np.testing.assert_allclose(np.triu(act_lo.numpy()),
                                   np.tril(expected_lower).T,
                                   rtol=self.rtol[dtype])
예제 #3
0
    def test_overwrite(self, dtype, order, get_mat, expected_lower, expected_upper):
        mat = get_mat(order=order, dtype=dtype).numpy().copy(order="K")
        with memory_checker(self.basic_opt) as new_opt:
            act_up = gpu_lauum(torch.from_numpy(mat), upper=True, overwrite=True, opt=new_opt)
        np.testing.assert_allclose(expected_upper, act_up.numpy(), rtol=self.rtol[dtype])

        mat = get_mat(order=order, dtype=dtype).numpy().copy(order="K")
        with memory_checker(self.basic_opt) as new_opt:
            act_lo = gpu_lauum(torch.from_numpy(mat), upper=False, overwrite=True, opt=new_opt)
        np.testing.assert_allclose(expected_lower, act_lo.numpy(), rtol=self.rtol[dtype])
예제 #4
0
def run_potrf_test(np_data, dtype, order, opt, start_cuda, upper, clean,
                   overwrite):
    # Convert pd_data to the appropriate form
    data = np.copy(np_data, order=order).astype(dtype, copy=False)
    lapack_fn, rtol = choose_on_dtype(dtype)
    A = torch.from_numpy(data.copy(order="K"))
    if start_cuda:
        A = A.cuda()

    orig_stride = A.stride()
    orig_ptr = A.data_ptr()

    with memory_checker(opt) as new_opt:
        C_gpu = gpu_cholesky(A,
                             upper=upper,
                             clean=clean,
                             overwrite=overwrite,
                             opt=new_opt)

    assert orig_stride == C_gpu.stride(), "gpu_potrf modified matrix stride."
    if overwrite:
        assert orig_ptr == C_gpu.data_ptr(
        ), "Data-pointer changed although overwrite is True."

    C_cpu = lapack_fn(data,
                      lower=int(not upper),
                      clean=int(clean),
                      overwrite_a=int(overwrite))[0]
    np.testing.assert_allclose(C_cpu,
                               C_gpu.cpu().numpy(),
                               rtol=rtol,
                               verbose=True)
예제 #5
0
def _run_test(fn, exp, tensors, out, rtol, opt):
    with memory_checker(opt) as new_opt:
        actual = fn(*tensors, out=out, opt=new_opt)

    # Check 1. Accuracy
    np.testing.assert_allclose(exp, actual, rtol=rtol)
    # Check 2. Output pointers
    if out is not None:
        assert out.data_ptr() == actual.data_ptr(), "Output data tensor was not used"
예제 #6
0
    def test_write_opposite(self, dtype, order, get_mat, expected_lower, expected_upper, device):
        omat = get_mat(order=order, dtype=dtype)
        mat = get_mat(order=order, dtype=dtype, device=device)

        mgpu_slack = mat.shape[0]**2 * sizeof_dtype(mat.dtype)
        with memory_checker(self.basic_opt, extra_mem=mgpu_slack) as new_opt:
            act_up = gpu_lauum(mat, upper=True, overwrite=False, write_opposite=True, opt=new_opt)
        act_up = act_up.cpu()
        np.testing.assert_allclose(np.triu(omat, k=1), np.triu(act_up.numpy(), k=1),
                                   rtol=self.rtol[dtype])
        np.testing.assert_allclose(np.tril(act_up.numpy()), np.triu(expected_upper).T,
                                   rtol=self.rtol[dtype])

        mat = get_mat(order=order, dtype=dtype, device=device)
        with memory_checker(self.basic_opt) as new_opt:
            act_lo = gpu_lauum(mat, upper=False, overwrite=True, write_opposite=True, opt=new_opt)
            torch.cuda.synchronize()
        act_lo = act_lo.cpu()
        np.testing.assert_allclose(np.tril(omat, k=-1), np.tril(act_lo.numpy(), k=-1),
                                   rtol=self.rtol[dtype])
        np.testing.assert_allclose(np.triu(act_lo.numpy()), np.tril(expected_lower).T,
                                   rtol=self.rtol[dtype])
예제 #7
0
def _run_fmmv_test(fn, exp, tensors, out, rtol, opt):
    # TODO: On some systems (nest but not sperone), checking memory
    # usage for CPU functions fails miserably due to inconsistent
    # memory numbers being reported at random. We simply replace CPU
    # with a high number to avoid checking.
    extra_mem = 10 * 2**30 if opt.use_cpu else 0
    opt = dataclasses.replace(opt, max_cpu_mem=opt.max_cpu_mem + extra_mem)
    with memory_checker(opt) as new_opt:
        actual = fn(*tensors, out=out, opt=new_opt)

    # Check 1. Accuracy
    np.testing.assert_allclose(exp, actual.cpu(), rtol=rtol)
    # Check 2. Output pointers
    if out is not None:
        assert out.data_ptr() == actual.data_ptr(), "Output data tensor was not used"
예제 #8
0
def _run_fmm_test(k_class, k_exp, A, B, out, dtype, rtol, opt):
    if isinstance(A, np.ndarray):
        A = torch.from_numpy(A.astype(dtype, copy=False))
    if isinstance(B, np.ndarray):
        B = torch.from_numpy(B.astype(dtype, copy=False))
    if out is not None and isinstance(out, np.ndarray):
        out = torch.from_numpy(out.astype(dtype, copy=False))

    with memory_checker(opt) as new_opt:
        actual = k_class(A, B, out=out, opt=new_opt)

    np.testing.assert_allclose(k_exp, actual, rtol=rtol)
    if out is not None:
        # Check output pointers
        assert out.data_ptr() == actual.data_ptr(
        ), "Output data tensor was not used"