def setUpClass(self): self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0') os.environ['OPTKIT_USE_LOCALLIBS'] = '1' self.libs = DenseLinsysLibs() self.A_test = self.A_test_gen
class DenseLibsTestCase(OptkitCTestCase): @classmethod def setUpClass(self): self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0') os.environ['OPTKIT_USE_LOCALLIBS'] = '1' self.libs = DenseLinsysLibs() @classmethod def tearDownClass(self): os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig def test_libs_exist(self): libs = [] for (gpu, single_precision) in self.CONDITIONS: libs.append(self.libs.get( single_precision=single_precision, gpu=gpu)) self.assertTrue( any(libs) ) def test_lib_types(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.assertTrue( 'ok_float' in dir(lib) ) self.assertTrue( 'ok_int' in dir(lib) ) self.assertTrue( 'c_int_p' in dir(lib) ) self.assertTrue( 'ok_float_p' in dir(lib) ) self.assertTrue( 'ok_int_p' in dir(lib) ) self.assertTrue( 'vector' in dir(lib) ) self.assertTrue( 'vector_p' in dir(lib) ) self.assertTrue( 'matrix' in dir(lib) ) self.assertTrue( 'matrix_p' in dir(lib) ) self.assertTrue( single_precision == (lib.ok_float == c_float) ) self.assertTrue( lib.ok_int == c_int ) def test_blas_handle(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue handle = c_void_p() # create self.assertCall( lib.blas_make_handle(byref(handle)) ) # destroy self.assertCall( lib.blas_destroy_handle(handle) ) def test_device_reset(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue # reset self.assertCall( lib.ok_device_reset() ) # allocate - deallocate - reset handle = c_void_p() self.assertCall( lib.blas_make_handle(byref(handle)) ) self.assertCall( lib.blas_destroy_handle(handle) ) self.assertCall( lib.ok_device_reset() ) def test_version(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue major = c_int() minor = c_int() change = c_int() status = c_int() lib.optkit_version(byref(major), byref(minor), byref(change), byref(status)) version = self.version_string(major.value, minor.value, change.value, status.value) self.assertNotEqual( version, '0.0.0' ) if self.VERBOSE_TEST: print("denselib version", version)
class DenseBLASTestCase(OptkitCTestCase): @classmethod def setUpClass(self): self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0') os.environ['OPTKIT_USE_LOCALLIBS'] = '1' self.libs = DenseLinsysLibs() self.A_test = self.A_test_gen @classmethod def tearDownClass(self): os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig def setUp(self): pass def tearDown(self): self.free_all_vars() self.exit_call() def test_blas1_dot(self): (m, n) = self.shape for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision TOL = 10**(-DIGITS) hdl = self.register_blas_handle(lib, 'hdl') v, v_py, v_ptr = self.register_vector(lib, m, 'v') w, w_py, w_ptr = self.register_vector(lib, m, 'w') v_py += np.random.rand(m) w_py += np.random.rand(m) lib.vector_memcpy_va(v, v_ptr, 1) lib.vector_memcpy_va(w, w_ptr, 1) answer = np.zeros(1).astype(lib.pyfloat) answer_p = answer.ctypes.data_as(lib.ok_float_p) self.assertCall( lib.blas_dot(hdl, v, w, answer_p) ) self.assertTrue( np.abs(answer[0] - v_py.dot(w_py)) <= TOL + TOL * answer[0] ) self.free_vars('v', 'w', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_blas1_nrm2(self): (m, n) = self.shape for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision TOL = 10**(-DIGITS) hdl = self.register_blas_handle(lib, 'hdl') v, v_py, v_ptr = self.register_vector(lib, m, 'v') v_py += np.random.rand(m) lib.vector_memcpy_va(v, v_ptr, 1) answer = np.zeros(1).astype(lib.pyfloat) answer_p = answer.ctypes.data_as(lib.ok_float_p) self.assertCall( lib.blas_nrm2(hdl, v, answer_p) ) self.assertScalarEqual( answer[0], np.linalg.norm(v_py), TOL ) self.free_vars('v', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_blas1_asum(self): (m, n) = self.shape for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision TOL = 10**(-DIGITS) hdl = self.register_blas_handle(lib, 'hdl') v, v_py, v_ptr = self.register_vector(lib, m, 'v') v_py += np.random.rand(m) self.assertCall( lib.vector_memcpy_va(v, v_ptr, 1) ) answer = np.zeros(1).astype(lib.pyfloat) answer_p = answer.ctypes.data_as(lib.ok_float_p) self.assertCall( lib.blas_asum(hdl, v, answer_p) ) self.assertScalarEqual( answer[0], np.linalg.norm(v_py, 1), TOL ) self.free_vars('v', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_blas1_scal(self): (m, n) = self.shape v_rand= np.random.rand(m) for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision TOL = 10**(-DIGITS) hdl = self.register_blas_handle(lib, 'hdl') v, v_py, v_ptr = self.register_vector(lib, m, 'v') v_py += v_rand self.assertCall( lib.vector_memcpy_va(v, v_ptr, 1) ) alpha = np.random.rand() self.assertCall( lib.blas_scal(hdl, alpha, v) ) self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertVecEqual( v_py, alpha * v_rand, TOL * m**0.5, TOL ) self.free_vars('v', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_blas1_axpy(self): (m, n) = self.shape for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision TOL = 10**(-DIGITS) hdl = self.register_blas_handle(lib, 'hdl') v, v_py, v_ptr = self.register_vector(lib, m, 'v') w, w_py, w_ptr = self.register_vector(lib, m, 'w') v_py += np.random.rand(m) w_py += np.random.rand(m) alpha = np.random.rand() pyresult = alpha * v_py + w_py self.assertCall( lib.vector_memcpy_va(v, v_ptr, 1) ) self.assertCall( lib.vector_memcpy_va(w, w_ptr, 1) ) self.assertCall( lib.blas_axpy(hdl, alpha, v, w) ) self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( w_py, pyresult, TOL * m**0.5, TOL ) self.free_vars('v', 'w', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_blas2_gemv(self): (m, n) = self.shape for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision TOL = 10**(-DIGITS) for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): hdl = self.register_blas_handle(lib, 'hdl') # make A, x, y A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') x, x_py, x_ptr = self.register_vector(lib, n, 'x') y, y_py, y_ptr = self.register_vector(lib, m, 'y') # populate A, x, y (in Py and C) A_py += self.A_test x_py += np.random.rand(n) y_py += np.random.rand(m) self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) ) self.assertCall( lib.vector_memcpy_va(y, y_ptr, 1) ) self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) ) # perform y = alpha * A * x + beta * y alpha = -0.5 + np.random.rand() beta = -0.5 + np.random.rand() pyresult = alpha * A_py.dot(x_py) + beta * y_py self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasNoTrans, alpha, A, x, beta, y) ) self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) ) self.assertVecEqual( y_py, pyresult, TOL * m**0.5, TOL ) # perform x = alpha * A' * y + beta * x y_py[:] = pyresult[:] pyresult = alpha * A_py.T.dot(y_py) + beta * x_py self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasTrans, alpha, A, y, beta, x) ) self.assertCall( lib.vector_memcpy_av(x_ptr, x, 1) ) self.assertVecEqual( x_py, pyresult, TOL * n**0.5, TOL ) self.free_vars('A', 'x', 'y', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_blas2_trsv(self): (m, n) = self.shape # generate lower triangular matrix L L_test = self.A_test.T.dot(self.A_test) # normalize L so inversion doesn't blow up L_test /= np.linalg.norm(L_test) for i in xrange(n): # diagonal entries ~ 1 to keep condition number reasonable L_test[i, i] /= 10**np.log(n) L_test[i, i] += 1 # upper triangle = 0 for j in xrange(n): if j > i: L_test[i, j] *= 0 x_rand = np.random.rand(n) for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision TOL = 10**(-DIGITS) for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): hdl = self.register_blas_handle(lib, 'hdl') # make L, x L, L_py, L_ptr = self.register_matrix(lib, n, n, order, 'L') x, x_py, x_ptr = self.register_vector(lib, n, 'x') # populate L, x L_py += L_test x_py += x_rand self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) ) self.assertCall( lib.matrix_memcpy_ma(L, L_ptr, order) ) # y = inv(L) * x pyresult = np.linalg.solve(L_test, x_rand) self.assertCall( lib.blas_trsv(hdl, lib.enums.CblasLower, lib.enums.CblasNoTrans, lib.enums.CblasNonUnit, L, x) ) self.assertCall( lib.vector_memcpy_av(x_ptr, x, 1) ) self.assertVecEqual( x_py, pyresult, TOL * n**0.5, TOL ) self.free_vars('L', 'x', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_blas2_sbmv(self): (m, n) = self.shape diags = max(1, min(4, min(m, n) - 1)) s_test = np.random.rand(n * diags) x_rand = np.random.rand(n) y_rand = np.random.rand(n) for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision TOL = 10**(-DIGITS) hdl = self.register_blas_handle(lib, 'hdl') # make symmetric banded "matrix" S stored as vector s, # and vectors x, y s, s_py, s_ptr = self.register_vector(lib, n * diags, 's') x, x_py, x_ptr = self.register_vector(lib, n, 'x') y, y_py, y_ptr = self.register_vector(lib, n, 'y') # populate vectors s_py += s_test x_py += x_rand y_py += y_rand self.assertCall( lib.vector_memcpy_va(s, s_ptr, 1) ) self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) ) self.assertCall( lib.vector_memcpy_va(y, y_ptr, 1) ) # y = alpha alpha = np.random.rand() beta = np.random.rand() pyresult = np.zeros(n) for d in xrange(diags): for j in xrange(n - d): if d > 0: pyresult[d + j] += s_test[d + diags * j] * x_rand[j] pyresult[j] += s_test[d + diags * j] * x_rand[d + j] pyresult *= alpha pyresult += beta * y_py self.assertCall( lib.blas_sbmv(hdl, lib.enums.CblasColMajor, lib.enums.CblasLower, diags - 1, alpha, s, x, beta, y) ) self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) ) self.assertVecEqual( y_py, pyresult, TOL * m**0.5, TOL ) self.free_vars('x', 'y', 's', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_diagmv(self): (m, n) = self.shape d_test = np.random.rand(n) x_rand = np.random.rand(n) y_rand = np.random.rand(n) for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision TOL = 10**(-DIGITS) hdl = self.register_blas_handle(lib, 'hdl') # make diagonal "matrix" D stored as vector d, # and vectors x, y d, d_py, d_ptr = self.register_vector(lib, n, 'd') x, x_py, x_ptr = self.register_vector(lib, n, 'x') y, y_py, y_ptr = self.register_vector(lib, n, 'y') # populate vectors d_py += d_test x_py += x_rand y_py += 2 self.assertCall( lib.vector_memcpy_va(d, d_ptr, 1) ) self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) ) self.assertCall( lib.vector_memcpy_va(y, y_ptr, 1) ) # y = alpha * D * x + beta * y alpha = np.random.rand() beta = np.random.rand() pyresult = alpha * d_py * x_py + beta * y_py self.assertCall( lib.blas_diagmv(hdl, alpha, d, x, beta, y) ) self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) ) self.assertVecEqual( y_py, pyresult, TOL * m**0.5, TOL ) self.free_vars('x', 'y', 'd', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_blas3_gemm(self): (m, n) = self.shape x_rand = np.random.rand(n) B_test = np.random.rand(m, n) C_test = np.random.rand(n, n) for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision - 1 * gpu RTOL = 10**(-DIGITS) ATOLMN = RTOL * (m * n)**0.5 for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): hdl = self.register_blas_handle(lib, 'hdl') # allocate A, B A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') B, B_py, B_ptr = self.register_matrix(lib, m, n, order, 'B') C, C_py, C_ptr = self.register_matrix(lib, n, n, order, 'C') # populate A_py += self.A_test B_py += B_test C_py += C_test self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) ) self.assertCall( lib.matrix_memcpy_ma(B, B_ptr, order) ) self.assertCall( lib.matrix_memcpy_ma(C, C_ptr, order) ) # perform C = alpha * B'A + beta * C alpha = np.random.rand() beta = np.random.rand() pyresult = alpha * B_py.T.dot(A_py) + beta * C_py self.assertCall( lib.blas_gemm(hdl, lib.enums.CblasTrans, lib.enums.CblasNoTrans, alpha, B, A, beta, C) ) self.assertCall( lib.matrix_memcpy_am(C_ptr, C, order) ) self.assertVecEqual( C_py, pyresult, ATOLMN, RTOL ) self.free_vars('A', 'B', 'C', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_blas3_syrk(self): (m, n) = self.shape B_test = np.random.rand(n, n) # make B symmetric B_test = B_test.T.dot(B_test) for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision TOL = 10**(-DIGITS) for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): hdl = self.register_blas_handle(lib, 'hdl') # allocate A, B A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') B, B_py, B_ptr = self.register_matrix(lib, n, n, order, 'B') # populate A_py += self.A_test[:, :] B_py += B_test self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) ) self.assertCall( lib.matrix_memcpy_ma(B, B_ptr, order) ) # B = alpha * (A'A) + beta * B alpha = np.random.rand() beta = np.random.rand() pyresult = alpha * A_py.T.dot(A_py) + beta * B_py self.assertCall( lib.blas_syrk(hdl, lib.enums.CblasLower, lib.enums.CblasTrans, alpha, A, beta, B) ) self.assertCall( lib.matrix_memcpy_am(B_ptr, B, order) ) for i in xrange(n): for j in xrange(n): if j > i: pyresult[i, j] *= 0 B_py[i, j] *= 0 self.assertVecEqual( B_py, pyresult, TOL * n, TOL ) self.free_vars('A', 'B', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_blas3_trsm(self): (m, n) = self.shape # make square, invertible L L_test = np.random.rand(n, n) for i in xrange(n): L_test[i, i] /= 10**np.log(n) L_test[i, i] += 1 for j in xrange(n): if j > i: L_test[i, j]*= 0 for (gpu, single_precision) in self.CONDITIONS: if gpu: continue lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * single_precision RTOL = 10**(-DIGITS) ATOLMN = RTOL * (m * n)**0.5 for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): hdl = self.register_blas_handle(lib, 'hdl') # allocate A, L A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') L, L_py, L_ptr = self.register_matrix(lib, n, n, order, 'L') # populate A_py += self.A_test L_py += L_test self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) ) self.assertCall( lib.matrix_memcpy_ma(L, L_ptr, order) ) # A = A * inv(L) pyresult = A_py.dot(np.linalg.inv(L_test)) self.assertCall( lib.blas_trsm(hdl, lib.enums.CblasRight, lib.enums.CblasLower, lib.enums.CblasNoTrans, lib.enums.CblasNonUnit, 1., L, A) ) self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) ) self.assertVecEqual( A_py, pyresult, ATOLMN, RTOL ) self.free_vars('A', 'L', 'hdl') self.assertCall( lib.ok_device_reset() )
class DenseLinalgTestCase(OptkitCTestCase): @classmethod def setUpClass(self): self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0') os.environ['OPTKIT_USE_LOCALLIBS'] = '1' self.libs = DenseLinsysLibs() self.A_test = self.A_test_gen @classmethod def tearDownClass(self): os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig def setUp(self): pass def tearDown(self): self.free_all_vars() self.exit_call() def test_cholesky(self): (m, n) = self.shape mindim = min(m, n) # build decently conditioned symmetric matrix AA_test = self.A_test.T.dot(self.A_test)[:mindim, :mindim] AA_test /= np.linalg.norm(AA_test) * mindim**0.5 for i in xrange(mindim): # diagonal entries ~ 1 to keep condition number reasonable AA_test[i, i] /= 10**np.log(mindim) AA_test[i, i] += 1 # upper triangle = 0 for j in xrange(mindim): if j > i: AA_test[i, j] *= 0 AA_test += AA_test.T x_rand = np.random.rand(mindim) pysol = np.linalg.solve(AA_test, x_rand) pychol = np.linalg.cholesky(AA_test) for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): hdl = self.register_blas_handle(lib, 'hdl') # allocate L, x L, L_py, L_ptr = self.register_matrix( lib, mindim, mindim, order, 'L') x, x_py, x_ptr = self.register_vector(lib, mindim, 'x') # populate L L_py *= 0 L_py += AA_test self.assertCall( lib.matrix_memcpy_ma(L, L_ptr, order) ) # cholesky factorization self.assertCall( lib.linalg_cholesky_decomp(hdl, L) ) self.assertCall( lib.matrix_memcpy_am(L_ptr, L, order) ) for i in xrange(mindim): for j in xrange(mindim): if j > i: L_py[i, j] *= 0 imprecision_factor = 5**(int(gpu) + int(single_precision)) atol = 1e-2 * imprecision_factor * mindim rtol = 1e-2 * imprecision_factor self.assertVecEqual( L_py.dot(x_rand), pychol.dot(x_rand), atol, rtol ) # populate x x_py *= 0 x_py += x_rand self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) ) # cholesky solve self.assertCall( lib.linalg_cholesky_svx(hdl, L, x) ) self.assertCall( lib.vector_memcpy_av(x_ptr, x, 1) ) self.assertVecEqual( x_py, pysol, atol * mindim**0.5, rtol ) self.free_vars('L', 'x', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_row_squares(self): m, n = self.shape for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 5 * lib.FLOAT - 1 * lib.GPU RTOL = 10**(-DIGITS) ATOLM = RTOL * m**0.5 ATOLN = RTOL * n**0.5 for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): # allocate A, r, c A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') c, c_py, c_ptr = self.register_vector(lib, n, 'c') r, r_py, r_ptr = self.register_vector(lib, m, 'r') A_py += self.A_test self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) ) py_rows = [A_py[i, :].dot(A_py[i, :]) for i in xrange(m)] py_cols = [A_py[:, j].dot(A_py[:, j]) for j in xrange(n)] # C: calculate row squares self.assertCall( lib.linalg_matrix_row_squares( lib.enums.CblasNoTrans, A, r) ) self.assertCall( lib.vector_memcpy_av(r_ptr, r, 1) ) # compare C vs Python results self.assertVecEqual( r_py, py_rows, ATOLM, RTOL ) # C: calculate column squares self.assertCall( lib.linalg_matrix_row_squares( lib.enums.CblasTrans, A, c) ) self.assertCall( lib.vector_memcpy_av(c_ptr, c, 1) ) # compare C vs Python results self.assertVecEqual( c_py, py_cols, ATOLN, RTOL ) # free memory self.free_vars('A', 'r', 'c') self.assertCall( lib.ok_device_reset() ) def test_broadcast(self): (m, n) = self.shape for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 5 * lib.FLOAT - 1 * lib.GPU RTOL = 10**(-DIGITS) ATOLM = RTOL * m**0.5 for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): hdl = self.register_blas_handle(lib, 'hdl') # allocate A, d, e, x, y A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') d, d_py, d_ptr = self.register_vector(lib, m, 'd') e, e_py, e_ptr = self.register_vector(lib, n, 'e') x, x_py, x_ptr = self.register_vector(lib, n, 'x') y, y_py, y_ptr = self.register_vector(lib, m, 'y') A_py += self.A_test d_py += np.random.rand(m) e_py += np.random.rand(n) x_py += np.random.rand(n) self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) ) self.assertCall( lib.vector_memcpy_va(d, d_ptr, 1) ) self.assertCall( lib.vector_memcpy_va(e, e_ptr, 1) ) self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) ) # A = A * diag(E) self.assertCall( lib.linalg_matrix_broadcast_vector(A, e, lib.enums.OkTransformScale, lib.enums.CblasRight) ) self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasNoTrans, 1, A, x, 0, y) ) self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) ) Ax = y_py AEx = A_py.dot(e_py * x_py) self.assertVecEqual( Ax, AEx, ATOLM, RTOL ) # A = diag(D) * A self.assertCall( lib.linalg_matrix_broadcast_vector(A, d, lib.enums.OkTransformScale, lib.enums.CblasLeft) ) self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasNoTrans, 1, A, x, 0, y) ) self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) ) Ax = y_py DAEx = d_py * AEx self.assertVecEqual( Ax, DAEx, ATOLM, RTOL ) # A += 1e' self.assertCall( lib.linalg_matrix_broadcast_vector(A, e, lib.enums.OkTransformAdd, lib.enums.CblasRight) ) self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasNoTrans, 1, A, x, 0, y) ) self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) ) Ax = y_py A_updatex = DAEx + np.ones(m) * e_py.dot(x_py) self.assertVecEqual( Ax, A_updatex, ATOLM, RTOL ) # A += d1' self.assertCall( lib.linalg_matrix_broadcast_vector(A, d, lib.enums.OkTransformAdd, lib.enums.CblasLeft) ) self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasNoTrans, 1, A, x, 0, y) ) self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) ) Ax = y_py A_updatex += d_py * sum(x_py) self.assertVecEqual( Ax, A_updatex, ATOLM, RTOL ) # free memory self.free_vars('A', 'd', 'e', 'x', 'y', 'hdl') self.assertCall( lib.ok_device_reset() ) def test_reduce(self): (m, n) = self.shape for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 5 * lib.FLOAT - 1 * lib.GPU RTOL = 10**(-DIGITS) ATOLM = RTOL * m**0.5 ATOLN = RTOL * n**0.5 for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): hdl = self.register_blas_handle(lib, 'hdl') # allocate A, d, e, x, y A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') d, d_py, d_ptr = self.register_vector(lib, m, 'd') e, e_py, e_ptr = self.register_vector(lib, n, 'e') x, x_py, x_ptr = self.register_vector(lib, n, 'x') y, y_py, y_ptr = self.register_vector(lib, m, 'y') A_py += self.A_test self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) ) x_py += np.random.rand(n) self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) ) # min - reduce columns colmin = np.min(A_py, 0) self.assertCall( lib.linalg_matrix_reduce_min(e, A, lib.enums.CblasLeft) ) self.assertCall( lib.vector_memcpy_av(e_ptr, e, 1) ) self.assertVecEqual( e_py, colmin, ATOLN, RTOL ) # min - reduce rows rowmin = np.min(A_py, 1) self.assertCall( lib.linalg_matrix_reduce_min(d, A, lib.enums.CblasRight) ) self.assertCall( lib.vector_memcpy_av(d_ptr, d, 1) ) self.assertVecEqual( d_py, rowmin, ATOLM, RTOL ) # max - reduce columns colmax = np.max(A_py, 0) self.assertCall( lib.linalg_matrix_reduce_max(e, A, lib.enums.CblasLeft) ) self.assertCall( lib.vector_memcpy_av(e_ptr, e, 1) ) self.assertVecEqual( e_py, colmax, ATOLN, RTOL ) # max - reduce rows rowmax = np.max(A_py, 1) self.assertCall( lib.linalg_matrix_reduce_max(d, A, lib.enums.CblasRight) ) self.assertCall( lib.vector_memcpy_av(d_ptr, d, 1) ) self.assertVecEqual( d_py, rowmax, ATOLM, RTOL ) # indmin - reduce columns idx, inds, inds_ptr = self.register_indvector(lib, n, 'idx') self.assertCall( lib.linalg_matrix_reduce_indmin(idx, e, A, lib.enums.CblasLeft) ) self.assertCall( lib.indvector_memcpy_av(inds_ptr, idx, 1) ) self.free_var('idx') calcmin = np.array([A_py[inds[i], i] for i in xrange(n)]) colmin = np.min(A_py, 0) self.assertVecEqual( calcmin, colmin, ATOLN, RTOL ) # indmin - reduce rows idx, inds, inds_ptr = self.register_indvector(lib, m, 'idx') self.assertCall( lib.linalg_matrix_reduce_indmin(idx, d, A, lib.enums.CblasRight) ) self.assertCall( lib.indvector_memcpy_av(inds_ptr, idx, 1) ) self.free_var('idx') calcmin = np.array([A_py[i, inds[i]] for i in xrange(m)]) rowmin = np.min(A_py, 1) self.assertVecEqual( calcmin, rowmin, ATOLM, RTOL ) # free memory self.free_vars('A', 'd', 'e', 'x', 'y', 'hdl') self.assertCall(lib.ok_device_reset() )
class MatrixTestCase(OptkitCTestCase): @classmethod def setUpClass(self): self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0') os.environ['OPTKIT_USE_LOCALLIBS'] = '1' self.libs = DenseLinsysLibs() self.A_test = self.A_test_gen @classmethod def tearDownClass(self): os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig def setUp(self): pass def tearDown(self): self.free_all_vars() self.exit_call() def test_alloc(self): (m, n) = self.shape for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): A = lib.matrix(0, 0, 0, None, order) self.assertEqual(A.size1, 0) self.assertEqual(A.size2, 0) self.assertEqual(A.ld, 0) self.assertEqual(A.order, order) # calloc self.assertCall(lib.matrix_calloc(A, m, n, order)) self.register_var('A', A, lib.matrix_free) self.assertEqual(A.size1, m) self.assertEqual(A.size2, n) if order == lib.enums.CblasRowMajor: self.assertEqual(A.ld, n) else: self.assertEqual(A.ld, m) self.assertEqual(A.order, order) if not gpu: for i in xrange(m * n): self.assertEqual(A.data[i], 0) # free self.free_var('A') self.assertCall(lib.ok_device_reset()) def test_io(self): (m, n) = self.shape A_rand = self.A_test x_rand = np.random.rand(n) for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 11 - 4 * lib.FLOAT - 1 * lib.GPU TOL = 10**(-DIGITS) for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') # memcpy_am # set A_py to A_rand. overwrite A_py with zeros from A A_py += A_rand self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order)) for i in xrange(m): for j in xrange(n): self.assertEqual(A_py[i, j], 0) # memcpy_ma A_py += A_rand self.assertCall(lib.matrix_memcpy_ma(A, A_ptr, order)) A_py *= 0 self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order)) self.assertVecEqual(A_py, A_rand, TOL, TOL) # memcpy_mm Z, Z_py, Z_ptr = self.register_matrix(lib, m, n, order, 'Z') self.assertCall(lib.matrix_memcpy_mm(Z, A, order)) self.assertCall(lib.matrix_memcpy_am(Z_ptr, Z, order)) self.assertVecEqual(Z_py, A_py, TOL, TOL) # view_array if not gpu: A_py *= 0 B = lib.matrix(0, 0, 0, None, order) self.assertCall( lib.matrix_view_array( B, A_rand.ctypes.data_as(lib.ok_float_p), m, n, order)) self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order)) self.assertVecEqual(A_py, A_rand, TOL, TOL) # set_all val = 2 A_rand *= 0 A_rand += val self.assertCall(lib.matrix_set_all(A, val)) self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order)) self.assertVecEqual(A_py, A_rand, TOL, TOL) self.free_vars('A', 'Z') self.assertCall(lib.ok_device_reset()) def test_slicing(self): """ matrix slicing tests """ (m, n) = self.shape A_rand = self.A_test x_rand = np.random.rand(n) for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU TOL = 10**(-DIGITS) for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): pyorder = 'C' if order == lib.enums.CblasRowMajor else 'F' A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') # set A, A_py to A_rand A_py += A_rand self.assertCall(lib.matrix_memcpy_ma(A, A_ptr, order)) # submatrix m0 = m / 4 n0 = n / 4 msub = m / 2 nsub = n / 2 Asub = lib.matrix(0, 0, 0, None, order) Asub_py = np.zeros((msub, nsub), order=pyorder).astype(lib.pyfloat) Asub_ptr = Asub_py.ctypes.data_as(lib.ok_float_p) self.assertCall( lib.matrix_submatrix(Asub, A, m0, n0, msub, nsub)) self.assertCall(lib.matrix_memcpy_am(Asub_ptr, Asub, order)) A_py_sub = A_py[m0:m0 + msub, n0:n0 + nsub] self.assertVecEqual(Asub_py, A_py_sub, TOL, TOL) # row v = lib.vector(0, 0, None) v_py = np.zeros(n).astype(lib.pyfloat) v_ptr = v_py.ctypes.data_as(lib.ok_float_p) self.assertCall(lib.matrix_row(v, A, m0)) self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) self.assertVecEqual(A_py[m0, :], v_py, TOL, TOL) # column v_py = np.zeros(m).astype(lib.pyfloat) v_ptr = v_py.ctypes.data_as(lib.ok_float_p) self.assertCall(lib.matrix_column(v, A, n0)) self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) self.assertVecEqual(A_py[:, n0], v_py, TOL, TOL) # diagonal v_py = np.zeros(min(m, n)).astype(lib.pyfloat) v_ptr = v_py.ctypes.data_as(lib.ok_float_p) self.assertCall(lib.matrix_diagonal(v, A)) self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) self.assertVecEqual(np.diag(A_py), v_py, TOL, TOL) self.free_var('A') self.assertCall(lib.ok_device_reset()) def test_math(self): """ matrix math tests """ (m, n) = self.shape A_rand = self.A_test for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU RTOL = 10**(-DIGITS) ATOLMN = RTOL * (m * n)**0.5 for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') # set A, A_py to A_rand A_py += A_rand self.assertCall(lib.matrix_memcpy_ma(A, A_ptr, order)) # scale: A = alpha * A alpha = np.random.rand() A_rand *= alpha self.assertCall(lib.matrix_scale(A, alpha)) self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order)) self.assertVecEqual(A_py, A_rand, ATOLMN, RTOL) # scale_left: A = diag(d) * A d, d_py, d_ptr = self.register_vector(lib, m, 'd') d_py[:] = np.random.rand(m) for i in xrange(m): A_rand[i, :] *= d_py[i] self.assertCall(lib.vector_memcpy_va(d, d_ptr, 1)) self.assertCall(lib.matrix_scale_left(A, d)) self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order)) self.assertVecEqual(A_py, A_rand, ATOLMN, RTOL) # scale_right: A = A * diag(e) e, e_py, e_ptr = self.register_vector(lib, n, 'e') e_py[:] = np.random.rand(n) for j in xrange(n): A_rand[:, j] *= e_py[j] self.assertCall(lib.vector_memcpy_va(e, e_ptr, 1)) self.assertCall(lib.matrix_scale_right(A, e)) self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order)) self.assertVecEqual(A_py, A_rand, ATOLMN, RTOL) # abs: A_ij = abs(A_ij) A_rand -= (A_rand.max() - A_rand.min()) / 2 A_py *= 0 A_py += A_rand A_rand = np.abs(A_rand) self.assertCall(lib.matrix_memcpy_ma(A, A_ptr, order)) self.assertCall(lib.matrix_abs(A)) self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order)) self.assertVecEqual(A_py, A_rand, ATOLMN, RTOL) # pow p = 3 * np.random.rand() A_rand **= p self.assertCall(lib.matrix_pow(A, p)) self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order)) self.assertVecEqual(A_py, A_rand, ATOLMN, RTOL) self.free_vars('d', 'e', 'A') self.assertCall(lib.ok_device_reset())
class VectorTestCase(OptkitCTestCase): @classmethod def setUpClass(self): self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0') os.environ['OPTKIT_USE_LOCALLIBS'] = '1' self.libs = DenseLinsysLibs() @classmethod def tearDownClass(self): os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig def tearDown(self): self.free_all_vars() self.exit_call() def test_alloc(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) len_v = 10 + int(10 * np.random.rand()) v = lib.vector(0, 0, None) self.assertEqual(v.size, 0) self.assertEqual(v.stride, 0) self.assertCall(lib.vector_calloc(v, len_v)) self.register_var('v', v, lib.vector_free) self.assertEqual(v.size, len_v) self.assertEqual(v.stride, 1) if not gpu: for i in range(v.size): self.assertEqual(v.data[i], 0) self.free_var('v') self.assertEqual(v.size, 0) self.assertEqual(v.stride, 0) def test_io(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) len_v = 10 + int(1000 * np.random.rand()) DIGITS = 7 - 2 * single_precision RTOL = 10**(-DIGITS) ATOL = RTOL * len_v**0.5 v, v_py, v_ptr = self.register_vector(lib, len_v, 'v') w, w_py, w_ptr = self.register_vector(lib, len_v, 'w') # set_all set_val = 5 self.assertCall(lib.vector_set_all(v, set_val)) # memcpy_av self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) for i in range(len_v): self.assertEqual(v_py[i], set_val) # memcpy_va w_rand = np.random.rand(len_v) w_py[:] = w_rand[:] self.assertCall(lib.vector_memcpy_va(w, w_ptr, 1)) w_py *= 0 self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1)) self.assertVecEqual(w_py, w_rand, ATOL, RTOL) # memcpy_vv self.assertCall(lib.vector_memcpy_vv(v, w)) self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) self.assertVecEqual(v_py, w_rand, ATOL, RTOL) # view_array if not gpu: u_rand = np.random.rand(len_v).astype(lib.pyfloat) u_ptr = u_rand.ctypes.data_as(lib.ok_float_p) u = lib.vector(0, 0, None) self.assertCall(lib.vector_view_array(u, u_ptr, u_rand.size)) self.assertCall(lib.vector_memcpy_av(v_ptr, u, 1)) self.assertVecEqual(v_py, u_rand, ATOL, RTOL) # DON'T FREE u, DATA OWNED BY PYTHON self.free_vars('v', 'w') self.assertCall(lib.ok_device_reset()) def test_subvector(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) len_v = 10 + int(10 * np.random.rand()) v, v_py, v_ptr = self.register_vector(lib, len_v, 'v') offset_sub = 3 len_sub = 3 v_sub = lib.vector(0, 0, None) self.assertCall(lib.vector_subvector(v_sub, v, offset_sub, len_sub)) self.assertEqual(v_sub.size, 3) self.assertEqual(v_sub.stride, v.stride) v_sub_py = np.zeros(len_sub).astype(lib.pyfloat) v_sub_ptr = v_sub_py.ctypes.data_as(lib.ok_float_p) self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) self.assertCall(lib.vector_memcpy_av(v_sub_ptr, v_sub, 1)) for i in range(len_sub): self.assertEqual(v_sub_py[i], v_py[i + offset_sub]) self.free_var('v') self.assertCall(lib.ok_device_reset()) def test_math(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) val1 = 12 * np.random.rand() val2 = 5 * np.random.rand() len_v = 10 + int(1000 * np.random.rand()) # len_v = 10 + int(10 * np.random.rand()) DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU RTOL = 10**(-DIGITS) ATOL = RTOL * len_v**0.5 v, v_py, v_ptr = self.register_vector(lib, len_v, 'v') w, w_py, w_ptr = self.register_vector(lib, len_v, 'w') # constant addition self.assertCall(lib.vector_add_constant(v, val1)) self.assertCall(lib.vector_add_constant(w, val2)) self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1)) self.assertVecEqual(v_py, val1, ATOL, RTOL) self.assertVecEqual(w_py, val2, ATOL, RTOL) # add two vectors self.assertCall(lib.vector_add(v, w)) val1 += val2 self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1)) self.assertVecEqual(v_py, val1, ATOL, RTOL) self.assertVecEqual(w_py, val2, ATOL, RTOL) # subtract two vectors self.assertCall(lib.vector_sub(w, v)) val2 -= val1 self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1)) self.assertVecEqual(v_py, val1, ATOL, RTOL) self.assertVecEqual(w_py, val2, ATOL, RTOL) # multiply two vectors self.assertCall(lib.vector_mul(v, w)) val1 *= val2 self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1)) self.assertVecEqual(v_py, val1, ATOL, RTOL) self.assertVecEqual(w_py, val2, ATOL, RTOL) # vector scale scal = 3 * np.random.rand() val1 *= scal self.assertCall(lib.vector_scale(v, scal)) self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) self.assertVecEqual(v_py, val1, ATOL, RTOL) # make sure v is strictly positive val1 = 0.7 + np.random.rand() self.assertCall(lib.vector_scale(v, 0)) self.assertCall(lib.vector_add_constant(v, val1)) # divide two vectors self.assertCall(lib.vector_div(w, v)) val2 /= float(val1) self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1)) self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1)) self.assertVecEqual(v_py, val1, ATOL, RTOL) self.assertVecEqual(w_py, val2, ATOL, RTOL) # make w strictly negative w_max = w_py.max() val2 -= (w_max + 1) self.assertCall(lib.vector_add_constant(w, -(w_max + 1))) # vector abs self.assertCall(lib.vector_abs(w)) val2 = abs(val2) self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1)) self.assertVecEqual(w_py, val2, ATOL, RTOL) # vector recip self.assertCall(lib.vector_recip(w)) val2 = 1. / val2 self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1)) self.assertVecEqual(w_py, val2, ATOL, RTOL) # vector sqrt self.assertCall(lib.vector_sqrt(w)) val2 **= 0.5 self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1)) self.assertVecEqual(w_py, val2, ATOL, RTOL) # vector pow pow_val = -2 + 4 * np.random.rand() self.assertCall(lib.vector_pow(w, pow_val)) val2 **= pow_val self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1)) self.assertVecEqual(w_py, val2, ATOL, RTOL) # vector exp self.assertCall(lib.vector_exp(w)) val2 = np.exp(val2) self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1)) self.assertVecEqual(val2, w_py, ATOL, RTOL) # min / max w_py *= 0 w_py += np.random.rand(len(w_py)) self.assertCall(lib.vector_memcpy_va(w, w_ptr, 1)) # vector argmin wargmin = np.zeros(1).astype(c_size_t) wargmin_p = wargmin.ctypes.data_as(lib.c_size_t_p) self.assertCall(lib.vector_indmin(w, wargmin_p)) self.assertScalarEqual(w_py[wargmin[0]], w_py.min(), RTOL) # # vector min wmin = np.zeros(1).astype(lib.pyfloat) wmin_p = wmin.ctypes.data_as(lib.ok_float_p) self.assertCall(lib.vector_min(w, wmin_p)) self.assertScalarEqual(wmin[0], w_py.min(), RTOL) # # vector max wmax = wmin wmax_p = wmin_p self.assertCall(lib.vector_max(w, wmax_p)) self.assertScalarEqual(wmax[0], w_py.max(), RTOL) self.free_vars('v', 'w') self.assertCall(lib.ok_device_reset()) def test_indvector_math(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) val1 = 12 * np.random.rand() val2 = 5 * np.random.rand() len_v = 10 + int(1000 * np.random.rand()) # len_v = 10 + int(10 * np.random.rand()) DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU RTOL = 10**(-DIGITS) w, w_py, w_ptr = self.register_indvector(lib, len_v, 'w') # min / max w_py *= 0 w_py += (30 * np.random.rand(len(w_py))).astype(w_py.dtype) self.assertCall(lib.indvector_memcpy_va(w, w_ptr, 1)) # vector argmin wargmin = np.zeros(1).astype(c_size_t) wargmin_p = wargmin.ctypes.data_as(lib.c_size_t_p) self.assertCall(lib.indvector_indmin(w, wargmin_p)) self.assertScalarEqual(w_py[wargmin[0]], w_py.min(), RTOL) # # vector min wmin = wargmin wmin_p = wargmin_p self.assertCall(lib.indvector_min(w, wmin_p)) self.assertScalarEqual(wmin[0], w_py.min(), RTOL) # vector max wmax = wmin wmax_p = wmin_p self.assertCall(lib.indvector_max(w, wmax_p)) self.assertScalarEqual(wmax[0], w_py.max(), RTOL) self.free_var('w') self.assertCall(lib.ok_device_reset())
class MatrixTestCase(OptkitCTestCase): @classmethod def setUpClass(self): self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0') os.environ['OPTKIT_USE_LOCALLIBS'] = '1' self.libs = DenseLinsysLibs() self.A_test = self.A_test_gen @classmethod def tearDownClass(self): os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig def setUp(self): pass def tearDown(self): self.free_all_vars() self.exit_call() def test_alloc(self): (m, n) = self.shape for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): A = lib.matrix(0, 0, 0, None, order) self.assertEqual( A.size1, 0 ) self.assertEqual( A.size2, 0 ) self.assertEqual( A.ld, 0 ) self.assertEqual( A.order, order ) # calloc self.assertCall( lib.matrix_calloc(A, m, n, order) ) self.register_var('A', A, lib.matrix_free) self.assertEqual( A.size1, m ) self.assertEqual( A.size2, n ) if order == lib.enums.CblasRowMajor: self.assertEqual( A.ld, n ) else: self.assertEqual( A.ld, m ) self.assertEqual( A.order, order ) if not gpu: for i in xrange(m * n): self.assertEqual( A.data[i], 0 ) # free self.free_var('A') self.assertCall( lib.ok_device_reset() ) def test_io(self): (m, n) = self.shape A_rand = self.A_test x_rand = np.random.rand(n) for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 11 - 4 * lib.FLOAT - 1 * lib.GPU TOL = 10**(-DIGITS) for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') # memcpy_am # set A_py to A_rand. overwrite A_py with zeros from A A_py += A_rand self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) ) for i in xrange(m): for j in xrange(n): self.assertEqual( A_py[i, j], 0) # memcpy_ma A_py += A_rand self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) ) A_py *= 0 self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) ) self.assertVecEqual( A_py, A_rand, TOL, TOL ) # memcpy_mm Z, Z_py, Z_ptr = self.register_matrix(lib, m, n, order, 'Z') self.assertCall( lib.matrix_memcpy_mm(Z, A, order) ) self.assertCall( lib.matrix_memcpy_am(Z_ptr, Z, order) ) self.assertVecEqual( Z_py, A_py, TOL, TOL ) # view_array if not gpu: A_py *= 0 B = lib.matrix(0, 0, 0, None, order) self.assertCall( lib.matrix_view_array(B, A_rand.ctypes.data_as(lib.ok_float_p), m, n, order) ) self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) ) self.assertVecEqual( A_py, A_rand, TOL, TOL ) # set_all val = 2 A_rand *= 0 A_rand += val self.assertCall( lib.matrix_set_all(A, val) ) self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) ) self.assertVecEqual( A_py, A_rand, TOL, TOL ) self.free_vars('A', 'Z') self.assertCall( lib.ok_device_reset() ) def test_slicing(self): """ matrix slicing tests """ (m, n) = self.shape A_rand = self.A_test x_rand = np.random.rand(n) for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU TOL = 10**(-DIGITS) for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): pyorder = 'C' if order == lib.enums.CblasRowMajor else 'F' A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') # set A, A_py to A_rand A_py += A_rand self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) ) # submatrix m0 = m / 4 n0 = n / 4 msub = m / 2 nsub = n / 2 Asub = lib.matrix(0, 0, 0, None, order) Asub_py = np.zeros( (msub, nsub), order=pyorder).astype(lib.pyfloat) Asub_ptr = Asub_py.ctypes.data_as(lib.ok_float_p) self.assertCall( lib.matrix_submatrix(Asub, A, m0, n0, msub, nsub) ) self.assertCall( lib.matrix_memcpy_am(Asub_ptr, Asub, order) ) A_py_sub = A_py[m0 : m0+msub, n0 : n0+nsub] self.assertVecEqual( Asub_py, A_py_sub, TOL, TOL ) # row v = lib.vector(0, 0, None) v_py = np.zeros(n).astype(lib.pyfloat) v_ptr = v_py.ctypes.data_as(lib.ok_float_p) self.assertCall( lib.matrix_row(v, A, m0) ) self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertVecEqual( A_py[m0, :], v_py, TOL, TOL ) # column v_py = np.zeros(m).astype(lib.pyfloat) v_ptr = v_py.ctypes.data_as(lib.ok_float_p) self.assertCall( lib.matrix_column(v, A, n0) ) self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertVecEqual( A_py[: , n0], v_py, TOL, TOL ) # diagonal v_py = np.zeros(min(m, n)).astype(lib.pyfloat) v_ptr = v_py.ctypes.data_as(lib.ok_float_p) self.assertCall( lib.matrix_diagonal(v, A) ) self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertVecEqual( np.diag(A_py), v_py, TOL, TOL ) self.free_var('A') self.assertCall( lib.ok_device_reset() ) def test_math(self): """ matrix math tests """ (m, n) = self.shape A_rand = self.A_test for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU RTOL = 10**(-DIGITS) ATOLMN = RTOL * (m * n)**0.5 for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor): A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A') # set A, A_py to A_rand A_py += A_rand self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) ) # scale: A = alpha * A alpha = np.random.rand() A_rand *= alpha self.assertCall( lib.matrix_scale(A, alpha) ) self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) ) self.assertVecEqual( A_py, A_rand, ATOLMN, RTOL ) # scale_left: A = diag(d) * A d, d_py, d_ptr = self.register_vector(lib, m, 'd') d_py[:] = np.random.rand(m) for i in xrange(m): A_rand[i, :] *= d_py[i] self.assertCall( lib.vector_memcpy_va(d, d_ptr, 1) ) self.assertCall( lib.matrix_scale_left(A, d) ) self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) ) self.assertVecEqual( A_py, A_rand, ATOLMN, RTOL ) # scale_right: A = A * diag(e) e, e_py, e_ptr = self.register_vector(lib, n, 'e') e_py[:] = np.random.rand(n) for j in xrange(n): A_rand[:, j] *= e_py[j] self.assertCall( lib.vector_memcpy_va(e, e_ptr, 1) ) self.assertCall( lib.matrix_scale_right(A, e) ) self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) ) self.assertVecEqual( A_py, A_rand, ATOLMN, RTOL ) # abs: A_ij = abs(A_ij) A_rand -= (A_rand.max() - A_rand.min()) / 2 A_py *= 0 A_py += A_rand A_rand = np.abs(A_rand) self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) ) self.assertCall( lib.matrix_abs(A) ) self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) ) self.assertVecEqual( A_py, A_rand, ATOLMN, RTOL ) # pow p = 3 * np.random.rand() A_rand **= p self.assertCall( lib.matrix_pow(A, p) ) self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) ) self.assertVecEqual( A_py, A_rand, ATOLMN, RTOL ) self.free_vars('d', 'e', 'A') self.assertCall( lib.ok_device_reset() )
class VectorTestCase(OptkitCTestCase): @classmethod def setUpClass(self): self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0') os.environ['OPTKIT_USE_LOCALLIBS'] = '1' self.libs = DenseLinsysLibs() @classmethod def tearDownClass(self): os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig def tearDown(self): self.free_all_vars() self.exit_call() def test_alloc(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) len_v = 10 + int(10 * np.random.rand()) v = lib.vector(0, 0, None) self.assertEqual( v.size, 0 ) self.assertEqual( v.stride, 0 ) self.assertCall( lib.vector_calloc(v, len_v) ) self.register_var('v', v, lib.vector_free) self.assertEqual( v.size, len_v ) self.assertEqual( v.stride, 1 ) if not gpu: for i in xrange(v.size): self.assertEqual( v.data[i], 0 ) self.free_var('v') self.assertEqual( v.size, 0 ) self.assertEqual( v.stride, 0 ) def test_io(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) len_v = 10 + int(1000 * np.random.rand()) DIGITS = 7 - 2 * single_precision RTOL = 10**(-DIGITS) ATOL = RTOL * len_v**0.5 v, v_py, v_ptr = self.register_vector(lib, len_v, 'v') w, w_py, w_ptr = self.register_vector(lib, len_v, 'w') # set_all set_val = 5 self.assertCall( lib.vector_set_all(v, set_val) ) # memcpy_av self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) for i in xrange(len_v): self.assertEqual(v_py[i], set_val) # memcpy_va w_rand = np.random.rand(len_v) w_py[:] = w_rand[:] self.assertCall( lib.vector_memcpy_va(w, w_ptr, 1) ) w_py *= 0 self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( w_py, w_rand, ATOL, RTOL ) # memcpy_vv self.assertCall( lib.vector_memcpy_vv(v, w) ) self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertVecEqual( v_py, w_rand, ATOL, RTOL ) # view_array if not gpu: u_rand = np.random.rand(len_v).astype(lib.pyfloat) u_ptr = u_rand.ctypes.data_as(lib.ok_float_p) u = lib.vector(0, 0, None) self.assertCall( lib.vector_view_array(u, u_ptr, u_rand.size) ) self.assertCall( lib.vector_memcpy_av(v_ptr, u, 1) ) self.assertVecEqual( v_py, u_rand, ATOL, RTOL ) # DON'T FREE u, DATA OWNED BY PYTHON self.free_vars('v', 'w') self.assertCall( lib.ok_device_reset() ) def test_subvector(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) len_v = 10 + int(10 * np.random.rand()) v, v_py, v_ptr = self.register_vector(lib, len_v, 'v') offset_sub = 3 len_sub = 3 v_sub = lib.vector(0, 0, None) self.assertCall( lib.vector_subvector(v_sub, v, offset_sub, len_sub) ) self.assertEqual( v_sub.size, 3 ) self.assertEqual( v_sub.stride, v.stride ) v_sub_py = np.zeros(len_sub).astype(lib.pyfloat) v_sub_ptr = v_sub_py.ctypes.data_as(lib.ok_float_p) self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertCall( lib.vector_memcpy_av(v_sub_ptr, v_sub, 1) ) for i in xrange(len_sub): self.assertEqual( v_sub_py[i], v_py[i + offset_sub] ) self.free_var('v') self.assertCall( lib.ok_device_reset() ) def test_math(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) val1 = 12 * np.random.rand() val2 = 5 * np.random.rand() len_v = 10 + int(1000 * np.random.rand()) # len_v = 10 + int(10 * np.random.rand()) DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU RTOL = 10**(-DIGITS) ATOL = RTOL * len_v**0.5 v, v_py, v_ptr = self.register_vector(lib, len_v, 'v') w, w_py, w_ptr = self.register_vector(lib, len_v, 'w') # constant addition self.assertCall( lib.vector_add_constant(v, val1) ) self.assertCall( lib.vector_add_constant(w, val2) ) self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( v_py, val1, ATOL, RTOL ) self.assertVecEqual( w_py, val2, ATOL, RTOL ) # add two vectors self.assertCall( lib.vector_add(v, w) ) val1 += val2 self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( v_py, val1, ATOL, RTOL ) self.assertVecEqual( w_py, val2, ATOL, RTOL ) # subtract two vectors self.assertCall( lib.vector_sub(w, v) ) val2 -= val1 self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( v_py, val1, ATOL, RTOL ) self.assertVecEqual( w_py, val2, ATOL, RTOL ) # multiply two vectors self.assertCall( lib.vector_mul(v, w) ) val1 *= val2 self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( v_py, val1, ATOL, RTOL ) self.assertVecEqual( w_py, val2, ATOL, RTOL ) # vector scale scal = 3 * np.random.rand() val1 *= scal self.assertCall( lib.vector_scale(v, scal) ) self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertVecEqual( v_py, val1, ATOL, RTOL ) # make sure v is strictly positive val1 = 0.7 + np.random.rand() self.assertCall( lib.vector_scale(v, 0) ) self.assertCall( lib.vector_add_constant(v, val1) ) # divide two vectors self.assertCall( lib.vector_div(w, v) ) val2 /= float(val1) self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) ) self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( v_py, val1, ATOL, RTOL ) self.assertVecEqual( w_py, val2, ATOL, RTOL ) # make w strictly negative w_max = w_py.max() val2 -= (w_max + 1) self.assertCall( lib.vector_add_constant(w, -(w_max + 1)) ) # vector abs self.assertCall( lib.vector_abs(w) ) val2 = abs(val2) self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( w_py, val2, ATOL, RTOL ) # vector recip self.assertCall( lib.vector_recip(w) ) val2 = 1. / val2 self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( w_py, val2, ATOL, RTOL ) # vector sqrt self.assertCall( lib.vector_sqrt(w) ) val2 **= 0.5 self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( w_py, val2, ATOL, RTOL ) # vector pow pow_val = -2 + 4 * np.random.rand() self.assertCall( lib.vector_pow(w, pow_val) ) val2 **= pow_val self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( w_py, val2, ATOL, RTOL ) # vector exp self.assertCall( lib.vector_exp(w) ) val2 = np.exp(val2) self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) ) self.assertVecEqual( val2, w_py, ATOL, RTOL ) # min / max w_py *= 0 w_py += np.random.rand(len(w_py)) self.assertCall( lib.vector_memcpy_va(w, w_ptr, 1) ) # vector argmin wargmin = np.zeros(1).astype(c_size_t) wargmin_p = wargmin.ctypes.data_as(lib.c_size_t_p) self.assertCall( lib.vector_indmin(w, wargmin_p) ) self.assertScalarEqual( w_py[wargmin[0]], w_py.min(), RTOL ) # # vector min wmin = np.zeros(1).astype(lib.pyfloat) wmin_p = wmin.ctypes.data_as(lib.ok_float_p) self.assertCall( lib.vector_min(w, wmin_p) ) self.assertScalarEqual( wmin[0], w_py.min(), RTOL ) # # vector max wmax = wmin wmax_p = wmin_p self.assertCall( lib.vector_max(w, wmax_p) ) self.assertScalarEqual( wmax[0], w_py.max(), RTOL ) self.free_vars('v', 'w') self.assertCall( lib.ok_device_reset() ) def test_indvector_math(self): for (gpu, single_precision) in self.CONDITIONS: lib = self.libs.get(single_precision=single_precision, gpu=gpu) if lib is None: continue self.register_exit(lib.ok_device_reset) val1 = 12 * np.random.rand() val2 = 5 * np.random.rand() len_v = 10 + int(1000 * np.random.rand()) # len_v = 10 + int(10 * np.random.rand()) DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU RTOL = 10**(-DIGITS) w, w_py, w_ptr = self.register_indvector(lib, len_v, 'w') # min / max w_py *= 0 w_py += (30 * np.random.rand(len(w_py))).astype(w_py.dtype) self.assertCall( lib.indvector_memcpy_va(w, w_ptr, 1) ) # vector argmin wargmin = np.zeros(1).astype(c_size_t) wargmin_p = wargmin.ctypes.data_as(lib.c_size_t_p) self.assertCall( lib.indvector_indmin(w, wargmin_p) ) self.assertScalarEqual( w_py[wargmin[0]], w_py.min(), RTOL ) # # vector min wmin = wargmin wmin_p = wargmin_p self.assertCall( lib.indvector_min(w, wmin_p) ) self.assertScalarEqual( wmin[0], w_py.min(), RTOL ) # vector max wmax = wmin wmax_p = wmin_p self.assertCall( lib.indvector_max(w, wmax_p) ) self.assertScalarEqual( wmax[0], w_py.max(), RTOL ) self.free_var('w') self.assertCall( lib.ok_device_reset() )