def test_cuda_start(self, mat, kernel, gram, dtype, order): opt = dataclasses.replace(self.basic_opt, use_cpu=False, cpu_preconditioner=False) rtol = self.rtol[dtype] mat = fix_mat(mat, dtype=dtype, order=order, copy=True) gpu_mat = move_tensor(mat, "cuda:0") gram = fix_mat(gram, dtype=dtype, order=order, copy=True) gpu_gram = move_tensor(gram, "cuda:0") la = 1 prec = FalkonPreconditioner(la, kernel, opt) prec.init(mat) gpu_prec = FalkonPreconditioner(la, kernel, opt) gpu_prec.init(gpu_mat) np.testing.assert_allclose(prec.dT.numpy(), gpu_prec.dT.cpu().numpy(), rtol=rtol) np.testing.assert_allclose(prec.dA.numpy(), gpu_prec.dA.cpu().numpy(), rtol=rtol) np.testing.assert_allclose(prec.fC.numpy(), gpu_prec.fC.cpu().numpy(), rtol=rtol * 10) assert gpu_prec.fC.device == gpu_mat.device, "Device changed unexpectedly" assert_invariant_on_TT(gpu_prec, gpu_gram, tol=rtol) assert_invariant_on_AT(prec, gram, la, tol=rtol) assert_invariant_on_T(prec, gram, tol=rtol * 10) assert_invariant_on_prec(prec, N, gram, la, tol=rtol * 10)
def test_fmmv_input_device(self, A, B, v, Ao, Adt, Bo, Bdt, vo, vdt, kernel, expected_fmmv): input_device = "cuda:0" A = fix_mat(A, order=Ao, dtype=Adt, device=input_device) B = fix_mat(B, order=Bo, dtype=Bdt, device=input_device) v = fix_mat(v, order=vo, dtype=vdt, device=input_device) opt = dataclasses.replace(self.basic_options, use_cpu=False) rtol = choose_on_dtype(A.dtype) # Test normal _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=None, rtol=rtol, opt=opt) # Test with out out = torch.empty(A.shape[0], v.shape[1], dtype=A.dtype, device=input_device) _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=out, rtol=rtol, opt=opt)
def test_gpu_inputs_fail(self, A, B, v, kernel, expected_fmmv): A = fix_mat(A, order="C", dtype=n32, device="cuda:0") B = fix_mat(B, order="C", dtype=n32, device="cuda:0") v = fix_mat(v, order="C", dtype=n32, device="cpu") opt = dataclasses.replace(self.basic_options, use_cpu=False, max_gpu_mem=np.inf) rtol = choose_on_dtype(A.dtype) # Test normal with pytest.raises(RuntimeError): _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=None, rtol=rtol, opt=opt)
def test_gpu_inputs(self, A, B, v, kernel, expected_fmmv): A = fix_mat(A, order="C", dtype=n32).cuda() B = fix_mat(B, order="C", dtype=n32, device=A.device) v = fix_mat(v, order="C", dtype=n32, device=A.device) opt = dataclasses.replace(self.basic_options, use_cpu=False, max_gpu_mem=np.inf) rtol = choose_on_dtype(A.dtype) # Test normal _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=None, rtol=rtol, opt=opt) # Test with out out = torch.empty(A.shape[0], v.shape[1], dtype=A.dtype, device=A.device) _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=out, rtol=rtol, opt=opt)
def test_dfmmv(self, s_A, s_B, v, w, Adt, Bdt, vo, vdt, wo, wdt, kernel, s_e_dfmmv, cpu): A = fix_sparse_mat(s_A[0], dtype=Adt) B = fix_sparse_mat(s_B[0], dtype=Bdt) v = fix_mat(v, order=vo, dtype=vdt) w = fix_mat(w, order=wo, dtype=wdt) opt = dataclasses.replace(self.basic_options, use_cpu=cpu) rtol = choose_on_dtype(A.dtype) # Test normal _run_fmmv_test(kernel.dmmv, s_e_dfmmv, (A, B, v, w), out=None, rtol=rtol, opt=opt) # Test with out out = torch.empty(m, t, dtype=A.dtype) _run_fmmv_test(kernel.dmmv, s_e_dfmmv, (A, B, v, w), out=out, rtol=rtol, opt=opt)
def test_trsm_wrapper(mat, arr, dtype, order, device, lower, transpose): rtol = 1e-2 if dtype == np.float32 else 1e-11 n_mat = move_tensor(fix_mat(mat, dtype=dtype, order=order, copy=True), device=device) n_arr = move_tensor(fix_mat(arr, dtype=dtype, order=order, copy=True), device=device) expected = sclb.dtrsm(1e-2, mat, arr, side=0, lower=lower, trans_a=transpose, overwrite_b=0) if device.startswith("cuda") and order == "C": with pytest.raises(ValueError): actual = trsm(n_arr, n_mat, alpha=1e-2, lower=lower, transpose=transpose) else: actual = trsm(n_arr, n_mat, alpha=1e-2, lower=lower, transpose=transpose) np.testing.assert_allclose(expected, actual.cpu().numpy(), rtol=rtol)
def test_zero_lambda(self, mat, kernel, gram, cpu): opt = dataclasses.replace(self.basic_opt, use_cpu=cpu, cpu_preconditioner=cpu) mat = fix_mat(mat, dtype=np.float64, order="K", copy=True) gram = fix_mat(gram, dtype=np.float64, order="K", copy=True) la = 0 prec = FalkonPreconditioner(la, kernel, opt) prec.init(mat) assert_invariant_on_TT(prec, gram, tol=1e-10) assert_invariant_on_AT(prec, gram, la, tol=1e-10) assert_invariant_on_T(prec, gram, tol=1e-9) assert_invariant_on_prec(prec, N, gram, la, tol=1e-8)
def test_mul(self, mat, upper, preserve_diag, order, device): inpt1 = fix_mat(mat, dtype=mat.dtype, order=order, copy=True, numpy=True) k = 1 if preserve_diag else 0 if upper: tri_fn = partial(np.triu, k=k) other_tri_fn = partial(np.tril, k=k - 1) else: tri_fn = partial(np.tril, k=-k) other_tri_fn = partial(np.triu, k=-k + 1) inpt1 = torch.from_numpy(inpt1) inpt1_dev = create_same_stride(inpt1.shape, inpt1, inpt1.dtype, device) inpt1_dev.copy_(inpt1) mul_triang(inpt1_dev, upper=upper, preserve_diag=preserve_diag, multiplier=10**6) inpt1 = inpt1_dev.cpu().numpy() assert np.mean(tri_fn(inpt1)) > 10**5 assert np.mean(other_tri_fn(inpt1)) < 1
def test_zero(self, mat, upper, preserve_diag, order, device): inpt1 = fix_mat(mat, dtype=mat.dtype, order=order, copy=True, numpy=True) inpt2 = inpt1.copy(order="K") k = 1 if preserve_diag else 0 if upper: tri_fn = partial(np.triu, k=k) else: tri_fn = partial(np.tril, k=-k) inpt1 = torch.from_numpy(inpt1) inpt1_dev = create_same_stride(inpt1.shape, inpt1, inpt1.dtype, device) inpt1_dev.copy_(inpt1) mul_triang(inpt1_dev, upper=upper, preserve_diag=preserve_diag, multiplier=0) inpt1 = inpt1_dev.cpu().numpy() assert np.sum(tri_fn(inpt1)) == 0 if preserve_diag: inpt2_dev = inpt1_dev inpt2_dev.copy_(torch.from_numpy(inpt2)) zero_triang(inpt2_dev, upper=upper) inpt2 = inpt2_dev.cpu().numpy() np.testing.assert_allclose(inpt1, inpt2)
def test_up(self, mat, order, dtype, device): mat = fix_mat(mat, order=order, dtype=dtype, numpy=True) mat_up = mat.copy(order="K") # Lower triangle of mat_up is 0 mat_up[np.tril_indices(self.t, -1)] = 0 # Create device matrix mat_up = torch.from_numpy(mat_up) mat_up_dev = move_tensor(mat_up, device) copy_triang(mat_up_dev, upper=True) mat_up = mat_up_dev.cpu().numpy() assert np.sum(mat_up == 0) == 0 np.testing.assert_array_equal(np.triu(mat), np.triu(mat_up)) np.testing.assert_array_equal(np.tril(mat_up), np.triu(mat_up).T) np.testing.assert_array_equal(np.diag(mat), np.diag(mat_up)) # Reset and try with `upper=False` mat_up[np.tril_indices(self.t, -1)] = 0 mat_up_dev.copy_(torch.from_numpy(mat_up)) copy_triang(mat_up_dev, upper=False) # Only the diagonal will be set. mat_up = mat_up_dev.cpu().numpy() np.testing.assert_array_equal(np.diag(mat), np.diag(mat_up))
def test_low(self, mat, order, dtype, device): mat = fix_mat(mat, order=order, dtype=dtype, numpy=True) mat_low = mat.copy(order="K") # Upper triangle of mat_low is 0 mat_low[np.triu_indices(self.t, 1)] = 0 # Create device matrix mat_low = torch.from_numpy(mat_low) mat_low_dev = move_tensor(mat_low, device) # Run copy copy_triang(mat_low_dev, upper=False) # Make checks on CPU mat_low = mat_low_dev.cpu().numpy() assert np.sum(mat_low == 0) == 0 np.testing.assert_array_equal(np.tril(mat), np.tril(mat_low)) np.testing.assert_array_equal(np.triu(mat_low), np.tril(mat_low).T) np.testing.assert_array_equal(np.diag(mat), np.diag(mat_low)) # Reset and try with `upper=True` mat_low[np.triu_indices(self.t, 1)] = 0 mat_low_dev.copy_(torch.from_numpy(mat_low)) copy_triang(mat_low_dev, upper=True) # Only the diagonal will be set mat_low = mat_low_dev.cpu().numpy() np.testing.assert_array_equal(np.diag(mat), np.diag(mat_low))
def test_simple(self, mat, kernel, gram, cpu, dtype, order): opt = dataclasses.replace(self.basic_opt, use_cpu=cpu, cpu_preconditioner=cpu) rtol = self.rtol[dtype] mat = fix_mat(mat, dtype=dtype, order=order, copy=True) gram = fix_mat(gram, dtype=dtype, order=order, copy=True) la = 100 prec = FalkonPreconditioner(la, kernel, opt) prec.init(mat) assert_invariant_on_TT(prec, gram, tol=rtol) assert_invariant_on_AT(prec, gram, la, tol=rtol) assert_invariant_on_T(prec, gram, tol=rtol * 10) assert_invariant_on_prec(prec, N, gram, la, tol=rtol * 10)
def test_trsm(self, mat, vec, solution, alpha, dtype, order_v, order_A, device): mat = move_tensor(fix_mat(mat, dtype, order_A, copy=True, numpy=False), device=device) vec = move_tensor(fix_mat(vec, dtype, order_v, copy=True, numpy=False), device=device) sol_vec, lower, trans = solution out = trsm(vec, mat, alpha, lower=int(lower), transpose=int(trans)) assert out.data_ptr() != vec.data_ptr(), "Vec was overwritten." assert out.device == vec.device, "Output device is incorrect." assert out.stride() == vec.stride(), "Stride was modified." assert out.dtype == vec.dtype, "Dtype was modified." np.testing.assert_allclose(sol_vec, out.cpu().numpy(), rtol=self.rtol[dtype])
def test_all_combos(self, mat, vec, order, device, upper, side): exp_output = self.exp_vec_mul_triang(mat, vec, upper, side) vec = fix_mat(vec, order=order, dtype=np.float64, numpy=False, device=device) mat2 = fix_mat(mat, order=order, dtype=np.float64, numpy=False, device=device, copy=True) out = vec_mul_triang(mat2, upper=upper, side=side, multipliers=vec).cpu().numpy() np.testing.assert_allclose(exp_output.numpy(), out) assert out.flags["%s_CONTIGUOUS" % order] is True, "Output is not %s-contiguous" % ( order) # Test with different vec orderings vec = vec.reshape(1, -1) mat2 = fix_mat(mat, order=order, dtype=np.float64, numpy=False, device=device, copy=True) out = vec_mul_triang(mat2, upper=upper, side=side, multipliers=vec).cpu().numpy() np.testing.assert_allclose(exp_output.numpy(), out, err_msg="Vec row ordering failed") vec = vec.reshape(-1) mat2 = fix_mat(mat, order=order, dtype=np.float64, numpy=False, device=device, copy=True) out = vec_mul_triang(mat2, upper=upper, side=side, multipliers=vec).cpu().numpy() np.testing.assert_allclose(exp_output.numpy(), out, err_msg="Vec 1D ordering failed")
def test_fmmv(self, s_A, s_B, v, Adt, Bdt, vo, vdt, kernel, s_expected_fmmv, cpu): A = fix_sparse_mat(s_A[0], dtype=Adt) B = fix_sparse_mat(s_B[0], dtype=Bdt) v = fix_mat(v, dtype=vdt, order=vo, copy=True) opt = dataclasses.replace(self.basic_options, use_cpu=cpu) rtol = choose_on_dtype(A.dtype) # Test normal _run_fmmv_test(kernel.mmv, s_expected_fmmv, (A, B, v), out=None, rtol=rtol, opt=opt) # Test with out out = torch.empty(A.shape[0], v.shape[1], dtype=A.dtype) _run_fmmv_test(kernel.mmv, s_expected_fmmv, (A, B, v), out=out, rtol=rtol, opt=opt)
def test_rect(self, rect, order, dtype): from falkon.la_helpers.cuda_la_helpers import cuda_transpose mat = fix_mat(rect, order=order, dtype=dtype, copy=True, numpy=True) exp_mat_out = np.copy(mat.T, order=order) mat = move_tensor(torch.from_numpy(mat), "cuda:0") mat_out = move_tensor(torch.from_numpy(exp_mat_out), "cuda:0") mat_out.fill_(0.0) cuda_transpose(input=mat, output=mat_out) mat_out = move_tensor(mat_out, "cpu").numpy() assert mat_out.strides == exp_mat_out.strides np.testing.assert_allclose(exp_mat_out, mat_out)
def test_up(self, mat, order, dtype): mat = fix_mat(mat, order=order, dtype=dtype, numpy=True) mat_up = mat.copy(order="K") # Upper triangle of mat_low is 0 mat_up[np.tril_indices(self.t, -1)] = 0 copy_triang(mat_up, upper=True) assert np.sum(mat_up == 0) == 0 np.testing.assert_array_equal(np.triu(mat), np.triu(mat_up)) np.testing.assert_array_equal(np.tril(mat_up), np.triu(mat_up).T) np.testing.assert_array_equal(np.diag(mat), np.diag(mat_up)) # Reset and try with `upper=False` mat_up[np.tril_indices(self.t, -1)] = 0 copy_triang(mat_up, upper=False) # Only the diagonal will be set. np.testing.assert_array_equal(np.diag(mat), np.diag(mat_up))
def test_lower(self, mat, exp_lower, clean, overwrite, order, dtype): mat = fix_mat(mat, order=order, dtype=dtype, copy=False, numpy=True) inpt = mat.copy(order="K") our_chol = potrf(inpt, upper=False, clean=clean, overwrite=overwrite) if overwrite: assert inpt.ctypes.data == our_chol.ctypes.data, "Overwriting failed" if clean: np.testing.assert_allclose(exp_lower, our_chol, rtol=self.rtol[dtype]) assert np.triu(our_chol, 1).sum() == 0 else: np.testing.assert_allclose(exp_lower, np.tril(our_chol), rtol=self.rtol[dtype]) np.testing.assert_allclose(np.triu(mat, 1), np.triu(our_chol, 1))
def test_cpu_gpu_equality(mat, kernel, gram): la = 12.3 mat = fix_mat(mat, dtype=np.float64, order="F", copy=True) opt = FalkonOptions(compute_arch_speed=False, use_cpu=False, cpu_preconditioner=False) prec_gpu = FalkonPreconditioner(la, kernel, opt) prec_gpu.init(mat) opt = dataclasses.replace(opt, use_cpu=True, cpu_preconditioner=True) prec_cpu = FalkonPreconditioner(la, kernel, opt) prec_cpu.init(mat) np.testing.assert_allclose(prec_cpu.fC, prec_gpu.fC, rtol=1e-10, atol=1e-10) np.testing.assert_allclose(prec_cpu.dA, prec_gpu.dA, rtol=1e-10) np.testing.assert_allclose(prec_cpu.dT, prec_gpu.dT, rtol=1e-10)
def test_upper(self, mat, vec, order): mat = fix_mat(mat, order=order, dtype=mat.dtype, numpy=True, copy=True) out = vec_mul_triang(mat.copy(order="K"), upper=True, side=0, multipliers=vec) exp = np.array([[0, 0, 0], [2, 2, 4], [6, 6, 4]], dtype=np.float32) np.testing.assert_allclose(exp, out) assert out.flags["%s_CONTIGUOUS" % order] is True, "Output is not %s-contiguous" % ( order) out = vec_mul_triang(mat.copy(order="K"), upper=True, side=1, multipliers=vec) exp = np.array([[0, 1, 0.5], [2, 2, 2], [6, 6, 4]], dtype=np.float32) np.testing.assert_allclose(exp, out) assert out.flags["%s_CONTIGUOUS" % order] is True, "Output is not %s-contiguous" % ( order)
def test_zero(self, mat, upper, preserve_diag, order): inpt1 = fix_mat(mat, dtype=mat.dtype, order=order, copy=True, numpy=True) inpt2 = inpt1.copy(order="K") k = 1 if preserve_diag else 0 if upper: tri_fn = partial(np.triu, k=k) else: tri_fn = partial(np.tril, k=-k) mul_triang(inpt1, upper=upper, preserve_diag=preserve_diag, multiplier=0) assert np.sum(tri_fn(inpt1)) == 0 if preserve_diag: zero_triang(inpt2, upper=upper) np.testing.assert_allclose(inpt1, inpt2)
def convert(dtype, order=None, sparse=False): if sparse: return s_B[0].to(dtype=numpy_to_torch_type(dtype)) return fix_mat(B, dtype=dtype, order=order)
def convert(dtype, order): return fix_mat(w, dtype=dtype, order=order)
def getter(order, dtype, device="cpu"): return fix_mat(matrix, dtype=dtype, order=order, copy=True, device=device)
def getter(order, dtype): return fix_mat(matrix, dtype=dtype, order=order, copy=True)