def test_large(self): t = 20_000 num_rep = 5 mat = torch.from_numpy(gen_random(t, t, np.float32, F=False, seed=123)) vec = torch.from_numpy( gen_random(t, 1, np.float32, F=False, seed=124).reshape((-1, ))) mat_cuda = mat.cuda() vec_cuda = vec.cuda() cpu_times = [] for i in range(num_rep): t_s = time.time() out_cpu = vec_mul_triang(mat, vec, True, 1) cpu_times.append(time.time() - t_s) gpu_times = [] for i in range(num_rep): t_s = time.time() out_cuda = vec_mul_triang(mat_cuda, vec_cuda, True, 1) torch.cuda.synchronize() gpu_times.append(time.time() - t_s) print("mat size %d - t_cpu: %.4fs -- t_cuda: %.4fs" % (t, np.min(cpu_times), np.min(gpu_times))) np.testing.assert_allclose(out_cpu, out_cuda.cpu().numpy())
def test_large(self): t = 30_000 mat = gen_random(t, t, np.float64, F=False, seed=123) vec = gen_random(t, 1, np.float64, F=False, seed=124).reshape((-1, )) t_s = time.time() vec_mul_triang(mat, vec, upper=True, side=1) t_tri = time.time() - t_s t_s = time.time() mat *= vec t_full = time.time() - t_s print("Our took %.2fs -- Full took %.2fs" % (t_tri, t_full))
def mat(self): return torch.from_numpy( gen_random(TestVecMulTriang.MAT_SIZE, TestVecMulTriang.MAT_SIZE, 'float64', False, seed=91))
def test_gaussian_pd(): X = gen_random(10000, 2, 'float32', F=True, seed=12) Xt = torch.from_numpy(X) sigma = 10.0 opt = FalkonOptions(compute_arch_speed=False, max_gpu_mem=1 * 2**30, use_cpu=False, no_single_kernel=False) k = GaussianKernel(sigma, opt=opt) actual = k(Xt, Xt, opt=opt) actual += torch.eye(Xt.shape[0]) * (1e-7 * Xt.shape[0]) # Test positive definite np.linalg.cholesky(actual)
def vec(self): return gen_random(self.t, self.r, np.float64, F=True, seed=124)
def test_add_diag(F): A = torch.from_numpy(gen_random(1000, 1000, 'float64', F=F, seed=10)) diag = 10**6 falkon.preconditioner.pc_utils.inplace_add_diag_th(A, diag) assert torch.all((A.diagonal() > 10**5) & (A.diagonal() < 20**6))
def rect(self): return gen_random(self.t, self.t * 2 - 1, np.float64, F=True, seed=12345)
def mat(self): return gen_random(self.t, self.t, np.float64, F=True, seed=123)
def dev_mat(request): return torch.from_numpy(gen_random(20, 20, 'float32', F=request.param)).cuda()
def mat(self): return gen_random(self.t, self.t, np.float32, F=False, seed=123)
def B(): return torch.from_numpy(gen_random(m, d, 'float64', False, seed=92))
def rowmaj_arr() -> torch.Tensor: return torch.from_numpy(gen_random(M, D, 'float64', False))
def Ac(): n, d = (4000, 10) return gen_random(n, d, 'float64', False, seed=92)
def Bc(): m, d = (2000, 10) return gen_random(m, d, 'float64', False, seed=92)
def A(n, d) -> torch.Tensor: return torch.from_numpy(gen_random(n, d, 'float64', False, seed=92))
def Ac(): return gen_random(n, d, 'float64', False, seed=92)
def Bc(): return gen_random(m, d, 'float64', False, seed=92)
def vec_rhs(self, request): return torch.from_numpy( gen_random(self.t, request.param, 'float64', F=False, seed=9))
def data(self): return torch.from_numpy( gen_random(self.N, self.D, 'float64', F=False, seed=10))
def Af(): n, d = (4000, 10) return np.asfortranarray(gen_random(n, d, 'float64', False, seed=92))
def v() -> torch.Tensor: return torch.from_numpy(gen_random(m, t, 'float64', False, seed=92))
def Bf(): m, d = (2000, 10) return np.asfortranarray(gen_random(m, d, 'float64', False, seed=92))
def mat(): return gen_random(M, M, 'float64', F=True, seed=10)
def w(): return torch.from_numpy(gen_random(n, t, 'float64', False, seed=92))
def arr(): return gen_random(M, T, 'float64', F=True, seed=12)
def colmaj_arr() -> torch.Tensor: return torch.from_numpy(gen_random(M, D, 'float64', True))
def test_add_diag(F): A = gen_random(1000, 1000, 'float64', F=F, seed=10) diag = 10**6 falkon.preconditioner.pc_utils.inplace_add_diag(A, diag) assert np.all(A.diagonal() > 10**5)