Beispiel #1
0
	def setUpClass(self):
		self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0')
		os.environ['OPTKIT_USE_LOCALLIBS'] = '1'
		self.libs = DenseLinsysLibs()
		self.A_test = self.A_test_gen
Beispiel #2
0
class DenseLibsTestCase(OptkitCTestCase):
	@classmethod
	def setUpClass(self):
		self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0')
		os.environ['OPTKIT_USE_LOCALLIBS'] = '1'
		self.libs = DenseLinsysLibs()

	@classmethod
	def tearDownClass(self):
		os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig

	def test_libs_exist(self):
		libs = []
		for (gpu, single_precision) in self.CONDITIONS:
			libs.append(self.libs.get(
					single_precision=single_precision, gpu=gpu))
		self.assertTrue( any(libs) )

	def test_lib_types(self):
		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue

			self.assertTrue( 'ok_float' in dir(lib) )
			self.assertTrue( 'ok_int' in dir(lib) )
			self.assertTrue( 'c_int_p' in dir(lib) )
			self.assertTrue( 'ok_float_p' in dir(lib) )
			self.assertTrue( 'ok_int_p' in dir(lib) )
			self.assertTrue( 'vector' in dir(lib) )
			self.assertTrue( 'vector_p' in dir(lib) )
			self.assertTrue( 'matrix' in dir(lib) )
			self.assertTrue( 'matrix_p' in dir(lib) )
			self.assertTrue( single_precision == (lib.ok_float == c_float) )
			self.assertTrue( lib.ok_int == c_int )

	def test_blas_handle(self):
		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue

			handle = c_void_p()
			# create
			self.assertCall( lib.blas_make_handle(byref(handle)) )
			# destroy
			self.assertCall( lib.blas_destroy_handle(handle) )

	def test_device_reset(self):
		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue

			# reset
			self.assertCall( lib.ok_device_reset() )

			# allocate - deallocate - reset
			handle = c_void_p()
			self.assertCall( lib.blas_make_handle(byref(handle)) )
			self.assertCall( lib.blas_destroy_handle(handle) )
			self.assertCall( lib.ok_device_reset() )

	def test_version(self):
		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue

			major = c_int()
			minor = c_int()
			change = c_int()
			status = c_int()

			lib.optkit_version(byref(major), byref(minor), byref(change),
							   byref(status))

			version = self.version_string(major.value, minor.value,
										  change.value, status.value)

			self.assertNotEqual( version, '0.0.0' )
			if self.VERBOSE_TEST:
				print("denselib version", version)
Beispiel #3
0
class DenseBLASTestCase(OptkitCTestCase):
	@classmethod
	def setUpClass(self):
		self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0')
		os.environ['OPTKIT_USE_LOCALLIBS'] = '1'
		self.libs = DenseLinsysLibs()
		self.A_test = self.A_test_gen

	@classmethod
	def tearDownClass(self):
		os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig

	def setUp(self):
		pass

	def tearDown(self):
		self.free_all_vars()
		self.exit_call()

	def test_blas1_dot(self):
		(m, n) = self.shape

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision
			TOL = 10**(-DIGITS)

			hdl = self.register_blas_handle(lib, 'hdl')
			v, v_py, v_ptr = self.register_vector(lib, m, 'v')
			w, w_py, w_ptr = self.register_vector(lib, m, 'w')

			v_py += np.random.rand(m)
			w_py += np.random.rand(m)
			lib.vector_memcpy_va(v, v_ptr, 1)
			lib.vector_memcpy_va(w, w_ptr, 1)

			answer = np.zeros(1).astype(lib.pyfloat)
			answer_p = answer.ctypes.data_as(lib.ok_float_p)
			self.assertCall( lib.blas_dot(hdl, v, w, answer_p) )
			self.assertTrue( np.abs(answer[0] - v_py.dot(w_py)) <=
							 TOL + TOL * answer[0] )

			self.free_vars('v', 'w', 'hdl')
			self.assertCall( lib.ok_device_reset() )

	def test_blas1_nrm2(self):
		(m, n) = self.shape

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision
			TOL = 10**(-DIGITS)

			hdl = self.register_blas_handle(lib, 'hdl')
			v, v_py, v_ptr = self.register_vector(lib, m, 'v')

			v_py += np.random.rand(m)
			lib.vector_memcpy_va(v, v_ptr, 1)

			answer = np.zeros(1).astype(lib.pyfloat)
			answer_p = answer.ctypes.data_as(lib.ok_float_p)
			self.assertCall( lib.blas_nrm2(hdl, v, answer_p) )
			self.assertScalarEqual( answer[0], np.linalg.norm(v_py), TOL )

			self.free_vars('v', 'hdl')
			self.assertCall( lib.ok_device_reset() )

	def test_blas1_asum(self):
		(m, n) = self.shape

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision
			TOL = 10**(-DIGITS)

			hdl = self.register_blas_handle(lib, 'hdl')
			v, v_py, v_ptr = self.register_vector(lib, m, 'v')

			v_py += np.random.rand(m)
			self.assertCall( lib.vector_memcpy_va(v, v_ptr, 1) )

			answer = np.zeros(1).astype(lib.pyfloat)
			answer_p = answer.ctypes.data_as(lib.ok_float_p)
			self.assertCall( lib.blas_asum(hdl, v, answer_p) )
			self.assertScalarEqual( answer[0], np.linalg.norm(v_py, 1), TOL )

			self.free_vars('v', 'hdl')
			self.assertCall( lib.ok_device_reset() )

	def test_blas1_scal(self):
		(m, n) = self.shape
		v_rand= np.random.rand(m)

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision
			TOL = 10**(-DIGITS)

			hdl = self.register_blas_handle(lib, 'hdl')
			v, v_py, v_ptr = self.register_vector(lib, m, 'v')

			v_py += v_rand
			self.assertCall( lib.vector_memcpy_va(v, v_ptr, 1) )

			alpha = np.random.rand()
			self.assertCall( lib.blas_scal(hdl, alpha, v) )
			self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
			self.assertVecEqual( v_py, alpha * v_rand, TOL * m**0.5, TOL )

			self.free_vars('v', 'hdl')
			self.assertCall( lib.ok_device_reset() )

	def test_blas1_axpy(self):
		(m, n) = self.shape

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision
			TOL = 10**(-DIGITS)

			hdl = self.register_blas_handle(lib, 'hdl')
			v, v_py, v_ptr = self.register_vector(lib, m, 'v')
			w, w_py, w_ptr = self.register_vector(lib, m, 'w')

			v_py += np.random.rand(m)
			w_py += np.random.rand(m)
			alpha = np.random.rand()
			pyresult = alpha * v_py + w_py
			self.assertCall( lib.vector_memcpy_va(v, v_ptr, 1) )
			self.assertCall( lib.vector_memcpy_va(w, w_ptr, 1) )
			self.assertCall( lib.blas_axpy(hdl, alpha, v, w) )
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( w_py, pyresult, TOL * m**0.5, TOL )

			self.free_vars('v', 'w', 'hdl')
			self.assertCall( lib.ok_device_reset() )

	def test_blas2_gemv(self):
		(m, n) = self.shape

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision
			TOL = 10**(-DIGITS)

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				hdl = self.register_blas_handle(lib, 'hdl')

				# make A, x, y
				A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')
				x, x_py, x_ptr = self.register_vector(lib, n, 'x')
				y, y_py, y_ptr = self.register_vector(lib, m, 'y')

				# populate A, x, y (in Py and C)
				A_py += self.A_test
				x_py += np.random.rand(n)
				y_py += np.random.rand(m)
				self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) )
				self.assertCall( lib.vector_memcpy_va(y, y_ptr, 1) )
				self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) )

				# perform y = alpha * A * x + beta *  y
				alpha = -0.5 + np.random.rand()
				beta = -0.5 + np.random.rand()

				pyresult = alpha * A_py.dot(x_py) + beta * y_py
				self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasNoTrans,
								 alpha, A, x, beta, y) )
				self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) )
				self.assertVecEqual( y_py, pyresult, TOL * m**0.5, TOL )

				# perform x = alpha * A' * y + beta * x
				y_py[:] = pyresult[:]
				pyresult = alpha * A_py.T.dot(y_py) + beta * x_py
				self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasTrans,
								 alpha, A, y, beta, x) )
				self.assertCall( lib.vector_memcpy_av(x_ptr, x, 1) )
				self.assertVecEqual( x_py, pyresult, TOL * n**0.5, TOL )

				self.free_vars('A', 'x', 'y', 'hdl')
				self.assertCall( lib.ok_device_reset() )

	def test_blas2_trsv(self):
		(m, n) = self.shape

		# generate lower triangular matrix L
		L_test = self.A_test.T.dot(self.A_test)

		# normalize L so inversion doesn't blow up
		L_test /= np.linalg.norm(L_test)

		for i in xrange(n):
			# diagonal entries ~ 1 to keep condition number reasonable
			L_test[i, i] /= 10**np.log(n)
			L_test[i, i] += 1
			# upper triangle = 0
			for j in xrange(n):
				if j > i:
					L_test[i, j] *= 0

		x_rand = np.random.rand(n)

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision
			TOL = 10**(-DIGITS)

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				hdl = self.register_blas_handle(lib, 'hdl')

				# make L, x
				L, L_py, L_ptr = self.register_matrix(lib, n, n, order, 'L')
				x, x_py, x_ptr = self.register_vector(lib, n, 'x')

				# populate L, x
				L_py += L_test
				x_py += x_rand
				self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) )
				self.assertCall( lib.matrix_memcpy_ma(L, L_ptr, order) )

				# y = inv(L) * x
				pyresult = np.linalg.solve(L_test, x_rand)
				self.assertCall( lib.blas_trsv(hdl, lib.enums.CblasLower,
							  lib.enums.CblasNoTrans, lib.enums.CblasNonUnit,
							  L, x) )
				self.assertCall( lib.vector_memcpy_av(x_ptr, x, 1) )
				self.assertVecEqual( x_py, pyresult, TOL * n**0.5, TOL )

				self.free_vars('L', 'x', 'hdl')
				self.assertCall( lib.ok_device_reset() )

	def test_blas2_sbmv(self):
		(m, n) = self.shape
		diags = max(1, min(4, min(m, n) - 1))

		s_test = np.random.rand(n * diags)
		x_rand = np.random.rand(n)
		y_rand = np.random.rand(n)

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision
			TOL = 10**(-DIGITS)

			hdl = self.register_blas_handle(lib, 'hdl')

			# make symmetric banded "matrix" S stored as vector s,
			# and vectors x, y
			s, s_py, s_ptr = self.register_vector(lib, n * diags, 's')
			x, x_py, x_ptr = self.register_vector(lib, n, 'x')
			y, y_py, y_ptr = self.register_vector(lib, n, 'y')

			# populate vectors
			s_py += s_test
			x_py += x_rand
			y_py += y_rand
			self.assertCall( lib.vector_memcpy_va(s, s_ptr, 1) )
			self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) )
			self.assertCall( lib.vector_memcpy_va(y, y_ptr, 1) )

			# y = alpha
			alpha = np.random.rand()
			beta = np.random.rand()
			pyresult = np.zeros(n)
			for d in xrange(diags):
				for j in xrange(n - d):
					if d > 0:
						pyresult[d + j] += s_test[d + diags * j] * x_rand[j]
					pyresult[j] += s_test[d + diags * j] * x_rand[d + j]
			pyresult *= alpha
			pyresult += beta * y_py

			self.assertCall( lib.blas_sbmv(hdl, lib.enums.CblasColMajor,
							 lib.enums.CblasLower, diags - 1, alpha, s, x,
							 beta, y) )
			self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) )
			self.assertVecEqual( y_py, pyresult, TOL * m**0.5, TOL )

			self.free_vars('x', 'y', 's', 'hdl')
			self.assertCall( lib.ok_device_reset() )


	def test_diagmv(self):
		(m, n) = self.shape

		d_test = np.random.rand(n)
		x_rand = np.random.rand(n)
		y_rand = np.random.rand(n)

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision
			TOL = 10**(-DIGITS)

			hdl = self.register_blas_handle(lib, 'hdl')

			# make diagonal "matrix" D stored as vector d,
			# and vectors x, y
			d, d_py, d_ptr = self.register_vector(lib, n, 'd')
			x, x_py, x_ptr = self.register_vector(lib, n, 'x')
			y, y_py, y_ptr = self.register_vector(lib, n, 'y')

			# populate vectors
			d_py += d_test
			x_py += x_rand
			y_py += 2
			self.assertCall( lib.vector_memcpy_va(d, d_ptr, 1) )
			self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) )
			self.assertCall( lib.vector_memcpy_va(y, y_ptr, 1) )

			# y = alpha * D * x + beta * y
			alpha = np.random.rand()
			beta = np.random.rand()
			pyresult = alpha * d_py * x_py + beta * y_py
			self.assertCall( lib.blas_diagmv(hdl, alpha, d, x, beta, y) )
			self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) )
			self.assertVecEqual( y_py, pyresult, TOL * m**0.5, TOL )

			self.free_vars('x', 'y', 'd', 'hdl')
			self.assertCall( lib.ok_device_reset() )

	def test_blas3_gemm(self):
		(m, n) = self.shape
		x_rand = np.random.rand(n)

		B_test = np.random.rand(m, n)
		C_test = np.random.rand(n, n)

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision - 1 * gpu
			RTOL = 10**(-DIGITS)
			ATOLMN = RTOL * (m * n)**0.5

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				hdl = self.register_blas_handle(lib, 'hdl')

				# allocate A, B
				A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')
				B, B_py, B_ptr = self.register_matrix(lib, m, n, order, 'B')
				C, C_py, C_ptr = self.register_matrix(lib, n, n, order, 'C')

				# populate
				A_py += self.A_test
				B_py += B_test
				C_py += C_test
				self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) )
				self.assertCall( lib.matrix_memcpy_ma(B, B_ptr, order) )
				self.assertCall( lib.matrix_memcpy_ma(C, C_ptr, order) )

				# perform C = alpha * B'A + beta * C
				alpha = np.random.rand()
				beta = np.random.rand()
				pyresult = alpha * B_py.T.dot(A_py) + beta * C_py
				self.assertCall( lib.blas_gemm(hdl, lib.enums.CblasTrans,
							  	 lib.enums.CblasNoTrans, alpha, B, A, beta,
							  	 C) )
				self.assertCall( lib.matrix_memcpy_am(C_ptr, C, order) )
				self.assertVecEqual( C_py, pyresult, ATOLMN, RTOL )

				self.free_vars('A', 'B', 'C', 'hdl')
				self.assertCall( lib.ok_device_reset() )

	def test_blas3_syrk(self):
		(m, n) = self.shape
		B_test = np.random.rand(n, n)

		# make B symmetric
		B_test = B_test.T.dot(B_test)

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision
			TOL = 10**(-DIGITS)

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				hdl = self.register_blas_handle(lib, 'hdl')

				# allocate A, B
				A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')
				B, B_py, B_ptr = self.register_matrix(lib, n, n, order, 'B')

				# populate
				A_py += self.A_test[:, :]
				B_py += B_test
				self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) )
				self.assertCall( lib.matrix_memcpy_ma(B, B_ptr, order) )

				# B = alpha * (A'A) + beta * B
				alpha = np.random.rand()
				beta = np.random.rand()
				pyresult = alpha * A_py.T.dot(A_py) + beta * B_py
				self.assertCall( lib.blas_syrk(hdl, lib.enums.CblasLower,
							  	 lib.enums.CblasTrans, alpha, A, beta, B) )
				self.assertCall( lib.matrix_memcpy_am(B_ptr, B, order) )
				for i in xrange(n):
					for j in xrange(n):
						if j > i:
							pyresult[i, j] *= 0
							B_py[i, j] *= 0
				self.assertVecEqual( B_py, pyresult, TOL * n, TOL )

				self.free_vars('A', 'B', 'hdl')
				self.assertCall( lib.ok_device_reset() )

	def test_blas3_trsm(self):
		(m, n) = self.shape

		# make square, invertible L
		L_test = np.random.rand(n, n)

		for i in xrange(n):
			L_test[i, i] /= 10**np.log(n)
			L_test[i, i] += 1
			for j in xrange(n):
				if j > i:
					L_test[i, j]*= 0

		for (gpu, single_precision) in self.CONDITIONS:
			if gpu:
				continue

			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * single_precision
			RTOL = 10**(-DIGITS)
			ATOLMN = RTOL * (m * n)**0.5

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				hdl = self.register_blas_handle(lib, 'hdl')

				# allocate A, L
				A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')
				L, L_py, L_ptr = self.register_matrix(lib, n, n, order, 'L')

				# populate
				A_py += self.A_test
				L_py += L_test
				self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) )
				self.assertCall( lib.matrix_memcpy_ma(L, L_ptr, order) )

				# A = A * inv(L)
				pyresult = A_py.dot(np.linalg.inv(L_test))
				self.assertCall( lib.blas_trsm(hdl, lib.enums.CblasRight,
								 lib.enums.CblasLower, lib.enums.CblasNoTrans,
								 lib.enums.CblasNonUnit, 1., L, A) )
				self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) )
				self.assertVecEqual( A_py, pyresult, ATOLMN, RTOL )

				self.free_vars('A', 'L', 'hdl')
				self.assertCall( lib.ok_device_reset() )
Beispiel #4
0
class DenseLinalgTestCase(OptkitCTestCase):
	@classmethod
	def setUpClass(self):
		self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0')
		os.environ['OPTKIT_USE_LOCALLIBS'] = '1'
		self.libs = DenseLinsysLibs()
		self.A_test = self.A_test_gen

	@classmethod
	def tearDownClass(self):
		os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig

	def setUp(self):
		pass

	def tearDown(self):
		self.free_all_vars()
		self.exit_call()

	def test_cholesky(self):
		(m, n) = self.shape

		mindim = min(m, n)

		# build decently conditioned symmetric matrix
		AA_test = self.A_test.T.dot(self.A_test)[:mindim, :mindim]
		AA_test /= np.linalg.norm(AA_test) * mindim**0.5
		for i in xrange(mindim):
			# diagonal entries ~ 1 to keep condition number reasonable
			AA_test[i, i] /= 10**np.log(mindim)
			AA_test[i, i] += 1
			# upper triangle = 0
			for j in xrange(mindim):
				if j > i:
					AA_test[i, j] *= 0
		AA_test += AA_test.T

		x_rand = np.random.rand(mindim)
		pysol = np.linalg.solve(AA_test, x_rand)
		pychol = np.linalg.cholesky(AA_test)

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				hdl = self.register_blas_handle(lib, 'hdl')

				# allocate L, x
				L, L_py, L_ptr = self.register_matrix(
					lib, mindim, mindim, order, 'L')
				x, x_py, x_ptr = self.register_vector(lib, mindim, 'x')

				# populate L
				L_py *= 0
				L_py += AA_test
				self.assertCall( lib.matrix_memcpy_ma(L, L_ptr, order) )

				# cholesky factorization
				self.assertCall( lib.linalg_cholesky_decomp(hdl, L) )
				self.assertCall( lib.matrix_memcpy_am(L_ptr, L, order) )
				for i in xrange(mindim):
					for j in xrange(mindim):
						if j > i:
							L_py[i, j] *= 0

				imprecision_factor = 5**(int(gpu) + int(single_precision))
				atol = 1e-2 * imprecision_factor * mindim
				rtol = 1e-2 * imprecision_factor
				self.assertVecEqual( L_py.dot(x_rand), pychol.dot(x_rand),
									 atol, rtol )

				# populate x
				x_py *= 0
				x_py += x_rand
				self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) )

				# cholesky solve
				self.assertCall( lib.linalg_cholesky_svx(hdl, L, x) )
				self.assertCall( lib.vector_memcpy_av(x_ptr, x, 1) )
				self.assertVecEqual( x_py, pysol, atol * mindim**0.5, rtol )

				self.free_vars('L', 'x', 'hdl')
				self.assertCall( lib.ok_device_reset() )

	def test_row_squares(self):
		m, n = self.shape

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 5 * lib.FLOAT - 1 * lib.GPU
			RTOL = 10**(-DIGITS)
			ATOLM = RTOL * m**0.5
			ATOLN = RTOL * n**0.5

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				# allocate A, r, c
				A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')
				c, c_py, c_ptr = self.register_vector(lib, n, 'c')
				r, r_py, r_ptr = self.register_vector(lib, m, 'r')

				A_py += self.A_test
				self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) )

				py_rows = [A_py[i, :].dot(A_py[i, :]) for i in xrange(m)]
				py_cols = [A_py[:, j].dot(A_py[:, j]) for j in xrange(n)]

				# C: calculate row squares
				self.assertCall( lib.linalg_matrix_row_squares(
						lib.enums.CblasNoTrans, A, r) )
				self.assertCall( lib.vector_memcpy_av(r_ptr, r, 1) )

				# compare C vs Python results
				self.assertVecEqual( r_py, py_rows, ATOLM, RTOL )

				# C: calculate column squares
				self.assertCall( lib.linalg_matrix_row_squares(
						lib.enums.CblasTrans, A, c) )
				self.assertCall( lib.vector_memcpy_av(c_ptr, c, 1) )

				# compare C vs Python results
				self.assertVecEqual( c_py, py_cols, ATOLN, RTOL )

				# free memory
				self.free_vars('A', 'r', 'c')
				self.assertCall( lib.ok_device_reset() )

	def test_broadcast(self):
		(m, n) = self.shape

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 5 * lib.FLOAT - 1 * lib.GPU
			RTOL = 10**(-DIGITS)
			ATOLM = RTOL * m**0.5

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				hdl = self.register_blas_handle(lib, 'hdl')

				# allocate A, d, e, x, y
				A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')
				d, d_py, d_ptr = self.register_vector(lib, m, 'd')
				e, e_py, e_ptr = self.register_vector(lib, n, 'e')
				x, x_py, x_ptr = self.register_vector(lib, n, 'x')
				y, y_py, y_ptr = self.register_vector(lib, m, 'y')

				A_py += self.A_test
				d_py += np.random.rand(m)
				e_py += np.random.rand(n)
				x_py += np.random.rand(n)
				self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) )
				self.assertCall( lib.vector_memcpy_va(d, d_ptr, 1) )
				self.assertCall( lib.vector_memcpy_va(e, e_ptr, 1) )
				self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) )

				# A = A * diag(E)
				self.assertCall( lib.linalg_matrix_broadcast_vector(A, e,
										lib.enums.OkTransformScale,
										lib.enums.CblasRight) )

				self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasNoTrans, 1,
										A, x, 0, y) )
				self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) )
				Ax = y_py
				AEx = A_py.dot(e_py * x_py)

				self.assertVecEqual( Ax, AEx, ATOLM, RTOL )

				# A = diag(D) * A
				self.assertCall( lib.linalg_matrix_broadcast_vector(A, d,
										lib.enums.OkTransformScale,
										lib.enums.CblasLeft) )
				self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasNoTrans, 1,
										A, x, 0, y) )
				self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) )
				Ax = y_py
				DAEx = d_py * AEx
				self.assertVecEqual( Ax, DAEx, ATOLM, RTOL )

				# A += 1e'
				self.assertCall( lib.linalg_matrix_broadcast_vector(A, e,
										lib.enums.OkTransformAdd,
										lib.enums.CblasRight) )
				self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasNoTrans, 1,
										A, x, 0, y) )
				self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) )
				Ax = y_py
				A_updatex = DAEx + np.ones(m) * e_py.dot(x_py)
				self.assertVecEqual( Ax, A_updatex, ATOLM, RTOL )

				# A += d1'
				self.assertCall( lib.linalg_matrix_broadcast_vector(A, d,
										lib.enums.OkTransformAdd,
										lib.enums.CblasLeft) )
				self.assertCall( lib.blas_gemv(hdl, lib.enums.CblasNoTrans, 1,
										A, x, 0, y) )
				self.assertCall( lib.vector_memcpy_av(y_ptr, y, 1) )
				Ax = y_py
				A_updatex += d_py * sum(x_py)
				self.assertVecEqual( Ax, A_updatex, ATOLM, RTOL )

				# free memory
				self.free_vars('A', 'd', 'e', 'x', 'y', 'hdl')
				self.assertCall( lib.ok_device_reset() )

	def test_reduce(self):
		(m, n) = self.shape

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 5 * lib.FLOAT - 1 * lib.GPU
			RTOL = 10**(-DIGITS)
			ATOLM = RTOL * m**0.5
			ATOLN = RTOL * n**0.5

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				hdl = self.register_blas_handle(lib, 'hdl')

				# allocate A, d, e, x, y
				A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')
				d, d_py, d_ptr = self.register_vector(lib, m, 'd')
				e, e_py, e_ptr = self.register_vector(lib, n, 'e')
				x, x_py, x_ptr = self.register_vector(lib, n, 'x')
				y, y_py, y_ptr = self.register_vector(lib, m, 'y')

				A_py += self.A_test
				self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) )

				x_py += np.random.rand(n)
				self.assertCall( lib.vector_memcpy_va(x, x_ptr, 1) )

				# min - reduce columns
				colmin = np.min(A_py, 0)
				self.assertCall( lib.linalg_matrix_reduce_min(e, A,
										lib.enums.CblasLeft) )
				self.assertCall( lib.vector_memcpy_av(e_ptr, e, 1) )
				self.assertVecEqual( e_py, colmin, ATOLN, RTOL )

				# min - reduce rows
				rowmin = np.min(A_py, 1)
				self.assertCall( lib.linalg_matrix_reduce_min(d, A,
										lib.enums.CblasRight) )
				self.assertCall( lib.vector_memcpy_av(d_ptr, d, 1) )
				self.assertVecEqual( d_py, rowmin, ATOLM, RTOL )

				# max - reduce columns
				colmax = np.max(A_py, 0)
				self.assertCall( lib.linalg_matrix_reduce_max(e, A,
										lib.enums.CblasLeft) )
				self.assertCall( lib.vector_memcpy_av(e_ptr, e, 1) )
				self.assertVecEqual( e_py, colmax, ATOLN, RTOL )

				# max - reduce rows
				rowmax = np.max(A_py, 1)
				self.assertCall( lib.linalg_matrix_reduce_max(d, A,
										lib.enums.CblasRight) )
				self.assertCall( lib.vector_memcpy_av(d_ptr, d, 1) )
				self.assertVecEqual( d_py, rowmax, ATOLM, RTOL )

				# indmin - reduce columns
				idx, inds, inds_ptr = self.register_indvector(lib, n, 'idx')
				self.assertCall( lib.linalg_matrix_reduce_indmin(idx, e, A,
										lib.enums.CblasLeft) )
				self.assertCall( lib.indvector_memcpy_av(inds_ptr, idx, 1) )
				self.free_var('idx')
				calcmin = np.array([A_py[inds[i], i] for i in xrange(n)])
				colmin = np.min(A_py, 0)
				self.assertVecEqual( calcmin, colmin, ATOLN, RTOL )

				# indmin - reduce rows
				idx, inds, inds_ptr = self.register_indvector(lib, m, 'idx')
				self.assertCall( lib.linalg_matrix_reduce_indmin(idx, d, A,
										lib.enums.CblasRight) )
				self.assertCall( lib.indvector_memcpy_av(inds_ptr, idx, 1) )
				self.free_var('idx')
				calcmin = np.array([A_py[i, inds[i]] for i in xrange(m)])
				rowmin = np.min(A_py, 1)
				self.assertVecEqual( calcmin, rowmin, ATOLM, RTOL )

				# free memory
				self.free_vars('A', 'd', 'e', 'x', 'y', 'hdl')
				self.assertCall(lib.ok_device_reset() )
Beispiel #5
0
class MatrixTestCase(OptkitCTestCase):
    @classmethod
    def setUpClass(self):
        self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0')
        os.environ['OPTKIT_USE_LOCALLIBS'] = '1'
        self.libs = DenseLinsysLibs()
        self.A_test = self.A_test_gen

    @classmethod
    def tearDownClass(self):
        os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig

    def setUp(self):
        pass

    def tearDown(self):
        self.free_all_vars()
        self.exit_call()

    def test_alloc(self):
        (m, n) = self.shape

        for (gpu, single_precision) in self.CONDITIONS:
            lib = self.libs.get(single_precision=single_precision, gpu=gpu)
            if lib is None:
                continue
            self.register_exit(lib.ok_device_reset)

            for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
                A = lib.matrix(0, 0, 0, None, order)
                self.assertEqual(A.size1, 0)
                self.assertEqual(A.size2, 0)
                self.assertEqual(A.ld, 0)
                self.assertEqual(A.order, order)

                # calloc
                self.assertCall(lib.matrix_calloc(A, m, n, order))
                self.register_var('A', A, lib.matrix_free)
                self.assertEqual(A.size1, m)
                self.assertEqual(A.size2, n)
                if order == lib.enums.CblasRowMajor:
                    self.assertEqual(A.ld, n)
                else:
                    self.assertEqual(A.ld, m)
                self.assertEqual(A.order, order)
                if not gpu:
                    for i in xrange(m * n):
                        self.assertEqual(A.data[i], 0)
                # free
                self.free_var('A')
                self.assertCall(lib.ok_device_reset())

    def test_io(self):
        (m, n) = self.shape
        A_rand = self.A_test
        x_rand = np.random.rand(n)

        for (gpu, single_precision) in self.CONDITIONS:
            lib = self.libs.get(single_precision=single_precision, gpu=gpu)
            if lib is None:
                continue
            self.register_exit(lib.ok_device_reset)

            DIGITS = 11 - 4 * lib.FLOAT - 1 * lib.GPU
            TOL = 10**(-DIGITS)

            for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
                A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')

                # memcpy_am
                # set A_py to A_rand. overwrite A_py with zeros from A
                A_py += A_rand
                self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order))
                for i in xrange(m):
                    for j in xrange(n):
                        self.assertEqual(A_py[i, j], 0)

                # memcpy_ma
                A_py += A_rand
                self.assertCall(lib.matrix_memcpy_ma(A, A_ptr, order))
                A_py *= 0
                self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order))
                self.assertVecEqual(A_py, A_rand, TOL, TOL)

                # memcpy_mm
                Z, Z_py, Z_ptr = self.register_matrix(lib, m, n, order, 'Z')
                self.assertCall(lib.matrix_memcpy_mm(Z, A, order))
                self.assertCall(lib.matrix_memcpy_am(Z_ptr, Z, order))
                self.assertVecEqual(Z_py, A_py, TOL, TOL)

                # view_array
                if not gpu:
                    A_py *= 0
                    B = lib.matrix(0, 0, 0, None, order)
                    self.assertCall(
                        lib.matrix_view_array(
                            B, A_rand.ctypes.data_as(lib.ok_float_p), m, n,
                            order))
                    self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order))
                    self.assertVecEqual(A_py, A_rand, TOL, TOL)

                # set_all
                val = 2
                A_rand *= 0
                A_rand += val
                self.assertCall(lib.matrix_set_all(A, val))
                self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order))
                self.assertVecEqual(A_py, A_rand, TOL, TOL)

                self.free_vars('A', 'Z')
                self.assertCall(lib.ok_device_reset())

    def test_slicing(self):
        """ matrix slicing tests """
        (m, n) = self.shape
        A_rand = self.A_test
        x_rand = np.random.rand(n)

        for (gpu, single_precision) in self.CONDITIONS:
            lib = self.libs.get(single_precision=single_precision, gpu=gpu)
            if lib is None:
                continue
            self.register_exit(lib.ok_device_reset)

            DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU
            TOL = 10**(-DIGITS)

            for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
                pyorder = 'C' if order == lib.enums.CblasRowMajor else 'F'

                A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')

                # set A, A_py to A_rand
                A_py += A_rand
                self.assertCall(lib.matrix_memcpy_ma(A, A_ptr, order))

                # submatrix
                m0 = m / 4
                n0 = n / 4
                msub = m / 2
                nsub = n / 2
                Asub = lib.matrix(0, 0, 0, None, order)
                Asub_py = np.zeros((msub, nsub),
                                   order=pyorder).astype(lib.pyfloat)
                Asub_ptr = Asub_py.ctypes.data_as(lib.ok_float_p)

                self.assertCall(
                    lib.matrix_submatrix(Asub, A, m0, n0, msub, nsub))
                self.assertCall(lib.matrix_memcpy_am(Asub_ptr, Asub, order))
                A_py_sub = A_py[m0:m0 + msub, n0:n0 + nsub]
                self.assertVecEqual(Asub_py, A_py_sub, TOL, TOL)

                # row
                v = lib.vector(0, 0, None)
                v_py = np.zeros(n).astype(lib.pyfloat)
                v_ptr = v_py.ctypes.data_as(lib.ok_float_p)
                self.assertCall(lib.matrix_row(v, A, m0))
                self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
                self.assertVecEqual(A_py[m0, :], v_py, TOL, TOL)

                # column
                v_py = np.zeros(m).astype(lib.pyfloat)
                v_ptr = v_py.ctypes.data_as(lib.ok_float_p)
                self.assertCall(lib.matrix_column(v, A, n0))
                self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
                self.assertVecEqual(A_py[:, n0], v_py, TOL, TOL)

                # diagonal
                v_py = np.zeros(min(m, n)).astype(lib.pyfloat)
                v_ptr = v_py.ctypes.data_as(lib.ok_float_p)
                self.assertCall(lib.matrix_diagonal(v, A))
                self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
                self.assertVecEqual(np.diag(A_py), v_py, TOL, TOL)

                self.free_var('A')
                self.assertCall(lib.ok_device_reset())

    def test_math(self):
        """ matrix math tests """
        (m, n) = self.shape
        A_rand = self.A_test

        for (gpu, single_precision) in self.CONDITIONS:
            lib = self.libs.get(single_precision=single_precision, gpu=gpu)
            if lib is None:
                continue
            self.register_exit(lib.ok_device_reset)

            DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU
            RTOL = 10**(-DIGITS)
            ATOLMN = RTOL * (m * n)**0.5

            for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
                A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')

                # set A, A_py to A_rand
                A_py += A_rand
                self.assertCall(lib.matrix_memcpy_ma(A, A_ptr, order))

                # scale: A = alpha * A
                alpha = np.random.rand()
                A_rand *= alpha
                self.assertCall(lib.matrix_scale(A, alpha))
                self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order))
                self.assertVecEqual(A_py, A_rand, ATOLMN, RTOL)

                # scale_left: A = diag(d) * A
                d, d_py, d_ptr = self.register_vector(lib, m, 'd')
                d_py[:] = np.random.rand(m)
                for i in xrange(m):
                    A_rand[i, :] *= d_py[i]
                self.assertCall(lib.vector_memcpy_va(d, d_ptr, 1))
                self.assertCall(lib.matrix_scale_left(A, d))
                self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order))
                self.assertVecEqual(A_py, A_rand, ATOLMN, RTOL)

                # scale_right: A = A * diag(e)
                e, e_py, e_ptr = self.register_vector(lib, n, 'e')
                e_py[:] = np.random.rand(n)
                for j in xrange(n):
                    A_rand[:, j] *= e_py[j]
                self.assertCall(lib.vector_memcpy_va(e, e_ptr, 1))
                self.assertCall(lib.matrix_scale_right(A, e))
                self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order))
                self.assertVecEqual(A_py, A_rand, ATOLMN, RTOL)

                # abs: A_ij = abs(A_ij)
                A_rand -= (A_rand.max() - A_rand.min()) / 2
                A_py *= 0
                A_py += A_rand
                A_rand = np.abs(A_rand)
                self.assertCall(lib.matrix_memcpy_ma(A, A_ptr, order))
                self.assertCall(lib.matrix_abs(A))
                self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order))
                self.assertVecEqual(A_py, A_rand, ATOLMN, RTOL)

                # pow
                p = 3 * np.random.rand()
                A_rand **= p
                self.assertCall(lib.matrix_pow(A, p))
                self.assertCall(lib.matrix_memcpy_am(A_ptr, A, order))
                self.assertVecEqual(A_py, A_rand, ATOLMN, RTOL)

                self.free_vars('d', 'e', 'A')
                self.assertCall(lib.ok_device_reset())
Beispiel #6
0
class VectorTestCase(OptkitCTestCase):
    @classmethod
    def setUpClass(self):
        self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0')
        os.environ['OPTKIT_USE_LOCALLIBS'] = '1'
        self.libs = DenseLinsysLibs()

    @classmethod
    def tearDownClass(self):
        os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig

    def tearDown(self):
        self.free_all_vars()
        self.exit_call()

    def test_alloc(self):
        for (gpu, single_precision) in self.CONDITIONS:
            lib = self.libs.get(single_precision=single_precision, gpu=gpu)
            if lib is None:
                continue
            self.register_exit(lib.ok_device_reset)

            len_v = 10 + int(10 * np.random.rand())

            v = lib.vector(0, 0, None)
            self.assertEqual(v.size, 0)
            self.assertEqual(v.stride, 0)

            self.assertCall(lib.vector_calloc(v, len_v))
            self.register_var('v', v, lib.vector_free)

            self.assertEqual(v.size, len_v)
            self.assertEqual(v.stride, 1)

            if not gpu:
                for i in range(v.size):
                    self.assertEqual(v.data[i], 0)

            self.free_var('v')
            self.assertEqual(v.size, 0)
            self.assertEqual(v.stride, 0)

    def test_io(self):
        for (gpu, single_precision) in self.CONDITIONS:
            lib = self.libs.get(single_precision=single_precision, gpu=gpu)
            if lib is None:
                continue
            self.register_exit(lib.ok_device_reset)

            len_v = 10 + int(1000 * np.random.rand())
            DIGITS = 7 - 2 * single_precision
            RTOL = 10**(-DIGITS)
            ATOL = RTOL * len_v**0.5

            v, v_py, v_ptr = self.register_vector(lib, len_v, 'v')
            w, w_py, w_ptr = self.register_vector(lib, len_v, 'w')

            # set_all
            set_val = 5
            self.assertCall(lib.vector_set_all(v, set_val))

            # memcpy_av
            self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
            for i in range(len_v):
                self.assertEqual(v_py[i], set_val)

            # memcpy_va
            w_rand = np.random.rand(len_v)
            w_py[:] = w_rand[:]
            self.assertCall(lib.vector_memcpy_va(w, w_ptr, 1))
            w_py *= 0
            self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1))
            self.assertVecEqual(w_py, w_rand, ATOL, RTOL)

            # memcpy_vv
            self.assertCall(lib.vector_memcpy_vv(v, w))
            self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
            self.assertVecEqual(v_py, w_rand, ATOL, RTOL)

            # view_array
            if not gpu:
                u_rand = np.random.rand(len_v).astype(lib.pyfloat)
                u_ptr = u_rand.ctypes.data_as(lib.ok_float_p)
                u = lib.vector(0, 0, None)
                self.assertCall(lib.vector_view_array(u, u_ptr, u_rand.size))
                self.assertCall(lib.vector_memcpy_av(v_ptr, u, 1))
                self.assertVecEqual(v_py, u_rand, ATOL, RTOL)

                # DON'T FREE u, DATA OWNED BY PYTHON

            self.free_vars('v', 'w')
            self.assertCall(lib.ok_device_reset())

    def test_subvector(self):
        for (gpu, single_precision) in self.CONDITIONS:
            lib = self.libs.get(single_precision=single_precision, gpu=gpu)
            if lib is None:
                continue
            self.register_exit(lib.ok_device_reset)

            len_v = 10 + int(10 * np.random.rand())

            v, v_py, v_ptr = self.register_vector(lib, len_v, 'v')

            offset_sub = 3
            len_sub = 3
            v_sub = lib.vector(0, 0, None)
            self.assertCall(lib.vector_subvector(v_sub, v, offset_sub,
                                                 len_sub))
            self.assertEqual(v_sub.size, 3)
            self.assertEqual(v_sub.stride, v.stride)
            v_sub_py = np.zeros(len_sub).astype(lib.pyfloat)
            v_sub_ptr = v_sub_py.ctypes.data_as(lib.ok_float_p)
            self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
            self.assertCall(lib.vector_memcpy_av(v_sub_ptr, v_sub, 1))
            for i in range(len_sub):
                self.assertEqual(v_sub_py[i], v_py[i + offset_sub])

            self.free_var('v')
            self.assertCall(lib.ok_device_reset())

    def test_math(self):
        for (gpu, single_precision) in self.CONDITIONS:
            lib = self.libs.get(single_precision=single_precision, gpu=gpu)
            if lib is None:
                continue
            self.register_exit(lib.ok_device_reset)

            val1 = 12 * np.random.rand()
            val2 = 5 * np.random.rand()
            len_v = 10 + int(1000 * np.random.rand())
            # len_v = 10 + int(10 * np.random.rand())

            DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU
            RTOL = 10**(-DIGITS)
            ATOL = RTOL * len_v**0.5

            v, v_py, v_ptr = self.register_vector(lib, len_v, 'v')
            w, w_py, w_ptr = self.register_vector(lib, len_v, 'w')

            # constant addition
            self.assertCall(lib.vector_add_constant(v, val1))
            self.assertCall(lib.vector_add_constant(w, val2))
            self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
            self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1))
            self.assertVecEqual(v_py, val1, ATOL, RTOL)
            self.assertVecEqual(w_py, val2, ATOL, RTOL)

            # add two vectors
            self.assertCall(lib.vector_add(v, w))
            val1 += val2
            self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
            self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1))
            self.assertVecEqual(v_py, val1, ATOL, RTOL)
            self.assertVecEqual(w_py, val2, ATOL, RTOL)

            # subtract two vectors
            self.assertCall(lib.vector_sub(w, v))
            val2 -= val1
            self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
            self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1))
            self.assertVecEqual(v_py, val1, ATOL, RTOL)
            self.assertVecEqual(w_py, val2, ATOL, RTOL)

            # multiply two vectors
            self.assertCall(lib.vector_mul(v, w))
            val1 *= val2
            self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
            self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1))
            self.assertVecEqual(v_py, val1, ATOL, RTOL)
            self.assertVecEqual(w_py, val2, ATOL, RTOL)

            # vector scale
            scal = 3 * np.random.rand()
            val1 *= scal
            self.assertCall(lib.vector_scale(v, scal))
            self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
            self.assertVecEqual(v_py, val1, ATOL, RTOL)

            # make sure v is strictly positive
            val1 = 0.7 + np.random.rand()
            self.assertCall(lib.vector_scale(v, 0))
            self.assertCall(lib.vector_add_constant(v, val1))

            # divide two vectors
            self.assertCall(lib.vector_div(w, v))
            val2 /= float(val1)
            self.assertCall(lib.vector_memcpy_av(v_ptr, v, 1))
            self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1))
            self.assertVecEqual(v_py, val1, ATOL, RTOL)
            self.assertVecEqual(w_py, val2, ATOL, RTOL)

            # make w strictly negative
            w_max = w_py.max()
            val2 -= (w_max + 1)
            self.assertCall(lib.vector_add_constant(w, -(w_max + 1)))

            # vector abs
            self.assertCall(lib.vector_abs(w))
            val2 = abs(val2)
            self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1))
            self.assertVecEqual(w_py, val2, ATOL, RTOL)

            # vector recip
            self.assertCall(lib.vector_recip(w))
            val2 = 1. / val2
            self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1))
            self.assertVecEqual(w_py, val2, ATOL, RTOL)

            # vector sqrt
            self.assertCall(lib.vector_sqrt(w))
            val2 **= 0.5
            self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1))
            self.assertVecEqual(w_py, val2, ATOL, RTOL)

            # vector pow
            pow_val = -2 + 4 * np.random.rand()
            self.assertCall(lib.vector_pow(w, pow_val))
            val2 **= pow_val
            self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1))
            self.assertVecEqual(w_py, val2, ATOL, RTOL)

            # vector exp
            self.assertCall(lib.vector_exp(w))
            val2 = np.exp(val2)
            self.assertCall(lib.vector_memcpy_av(w_ptr, w, 1))
            self.assertVecEqual(val2, w_py, ATOL, RTOL)

            # min / max
            w_py *= 0
            w_py += np.random.rand(len(w_py))
            self.assertCall(lib.vector_memcpy_va(w, w_ptr, 1))

            # vector argmin
            wargmin = np.zeros(1).astype(c_size_t)
            wargmin_p = wargmin.ctypes.data_as(lib.c_size_t_p)
            self.assertCall(lib.vector_indmin(w, wargmin_p))
            self.assertScalarEqual(w_py[wargmin[0]], w_py.min(), RTOL)

            # # vector min
            wmin = np.zeros(1).astype(lib.pyfloat)
            wmin_p = wmin.ctypes.data_as(lib.ok_float_p)
            self.assertCall(lib.vector_min(w, wmin_p))
            self.assertScalarEqual(wmin[0], w_py.min(), RTOL)

            # # vector max
            wmax = wmin
            wmax_p = wmin_p
            self.assertCall(lib.vector_max(w, wmax_p))
            self.assertScalarEqual(wmax[0], w_py.max(), RTOL)

            self.free_vars('v', 'w')
            self.assertCall(lib.ok_device_reset())

    def test_indvector_math(self):
        for (gpu, single_precision) in self.CONDITIONS:
            lib = self.libs.get(single_precision=single_precision, gpu=gpu)
            if lib is None:
                continue
            self.register_exit(lib.ok_device_reset)

            val1 = 12 * np.random.rand()
            val2 = 5 * np.random.rand()
            len_v = 10 + int(1000 * np.random.rand())
            # len_v = 10 + int(10 * np.random.rand())

            DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU
            RTOL = 10**(-DIGITS)

            w, w_py, w_ptr = self.register_indvector(lib, len_v, 'w')

            # min / max
            w_py *= 0
            w_py += (30 * np.random.rand(len(w_py))).astype(w_py.dtype)
            self.assertCall(lib.indvector_memcpy_va(w, w_ptr, 1))

            # vector argmin
            wargmin = np.zeros(1).astype(c_size_t)
            wargmin_p = wargmin.ctypes.data_as(lib.c_size_t_p)
            self.assertCall(lib.indvector_indmin(w, wargmin_p))
            self.assertScalarEqual(w_py[wargmin[0]], w_py.min(), RTOL)

            # # vector min
            wmin = wargmin
            wmin_p = wargmin_p
            self.assertCall(lib.indvector_min(w, wmin_p))
            self.assertScalarEqual(wmin[0], w_py.min(), RTOL)

            # vector max
            wmax = wmin
            wmax_p = wmin_p
            self.assertCall(lib.indvector_max(w, wmax_p))
            self.assertScalarEqual(wmax[0], w_py.max(), RTOL)

            self.free_var('w')
            self.assertCall(lib.ok_device_reset())
Beispiel #7
0
class MatrixTestCase(OptkitCTestCase):
	@classmethod
	def setUpClass(self):
		self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0')
		os.environ['OPTKIT_USE_LOCALLIBS'] = '1'
		self.libs = DenseLinsysLibs()
		self.A_test = self.A_test_gen

	@classmethod
	def tearDownClass(self):
		os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig

	def setUp(self):
		pass

	def tearDown(self):
		self.free_all_vars()
		self.exit_call()

	def test_alloc(self):
		(m, n) = self.shape

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				A = lib.matrix(0, 0, 0, None, order)
				self.assertEqual( A.size1, 0 )
				self.assertEqual( A.size2, 0 )
				self.assertEqual( A.ld, 0 )
				self.assertEqual( A.order, order )

				# calloc
				self.assertCall( lib.matrix_calloc(A, m, n, order) )
				self.register_var('A', A, lib.matrix_free)
				self.assertEqual( A.size1, m )
				self.assertEqual( A.size2, n )
				if order == lib.enums.CblasRowMajor:
					self.assertEqual( A.ld, n )
				else:
					self.assertEqual( A.ld, m )
				self.assertEqual( A.order, order )
				if not gpu:
					for i in xrange(m * n):
						self.assertEqual( A.data[i], 0 )
				# free
				self.free_var('A')
				self.assertCall( lib.ok_device_reset() )

	def test_io(self):
		(m, n) = self.shape
		A_rand = self.A_test
		x_rand = np.random.rand(n)

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 11 - 4 * lib.FLOAT - 1 * lib.GPU
			TOL = 10**(-DIGITS)

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')

				# memcpy_am
				# set A_py to A_rand. overwrite A_py with zeros from A
				A_py += A_rand
				self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) )
				for i in xrange(m):
					for j in xrange(n):
						self.assertEqual( A_py[i, j], 0)

				# memcpy_ma
				A_py += A_rand
				self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) )
				A_py *= 0
				self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) )
				self.assertVecEqual( A_py, A_rand, TOL, TOL )

				# memcpy_mm
				Z, Z_py, Z_ptr = self.register_matrix(lib, m, n, order, 'Z')
				self.assertCall( lib.matrix_memcpy_mm(Z, A, order) )
				self.assertCall( lib.matrix_memcpy_am(Z_ptr, Z, order) )
				self.assertVecEqual( Z_py, A_py, TOL, TOL )

				# view_array
				if not gpu:
					A_py *= 0
					B = lib.matrix(0, 0, 0, None, order)
					self.assertCall( lib.matrix_view_array(B,
						A_rand.ctypes.data_as(lib.ok_float_p), m, n, order) )
					self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) )
					self.assertVecEqual( A_py, A_rand, TOL, TOL )

				# set_all
				val = 2
				A_rand *= 0
				A_rand += val
				self.assertCall( lib.matrix_set_all(A, val) )
				self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) )
				self.assertVecEqual( A_py, A_rand, TOL, TOL )

				self.free_vars('A', 'Z')
				self.assertCall( lib.ok_device_reset() )

	def test_slicing(self):
		""" matrix slicing tests """
		(m, n) = self.shape
		A_rand = self.A_test
		x_rand = np.random.rand(n)

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU
			TOL = 10**(-DIGITS)

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				pyorder = 'C' if order == lib.enums.CblasRowMajor else 'F'

				A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')

				# set A, A_py to A_rand
				A_py += A_rand
				self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) )

				# submatrix
				m0 = m / 4
				n0 = n / 4
				msub = m / 2
				nsub = n / 2
				Asub = lib.matrix(0, 0, 0, None, order)
				Asub_py = np.zeros(
						(msub, nsub), order=pyorder).astype(lib.pyfloat)
				Asub_ptr = Asub_py.ctypes.data_as(lib.ok_float_p)

				self.assertCall( lib.matrix_submatrix(Asub, A, m0, n0, msub,
								 nsub) )
				self.assertCall( lib.matrix_memcpy_am(Asub_ptr, Asub, order) )
				A_py_sub = A_py[m0 : m0+msub, n0 : n0+nsub]
				self.assertVecEqual( Asub_py, A_py_sub, TOL, TOL )

				# row
				v = lib.vector(0, 0, None)
				v_py = np.zeros(n).astype(lib.pyfloat)
				v_ptr = v_py.ctypes.data_as(lib.ok_float_p)
				self.assertCall( lib.matrix_row(v, A, m0) )
				self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
				self.assertVecEqual( A_py[m0, :], v_py, TOL, TOL )

				# column
				v_py = np.zeros(m).astype(lib.pyfloat)
				v_ptr = v_py.ctypes.data_as(lib.ok_float_p)
				self.assertCall( lib.matrix_column(v, A, n0) )
				self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
				self.assertVecEqual( A_py[: , n0], v_py, TOL, TOL )

				# diagonal
				v_py = np.zeros(min(m, n)).astype(lib.pyfloat)
				v_ptr = v_py.ctypes.data_as(lib.ok_float_p)
				self.assertCall( lib.matrix_diagonal(v, A) )
				self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
				self.assertVecEqual( np.diag(A_py), v_py, TOL, TOL )

				self.free_var('A')
				self.assertCall( lib.ok_device_reset() )

	def test_math(self):
		""" matrix math tests """
		(m, n) = self.shape
		A_rand = self.A_test

		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU
			RTOL = 10**(-DIGITS)
			ATOLMN = RTOL * (m * n)**0.5

			for order in (lib.enums.CblasRowMajor, lib.enums.CblasColMajor):
				A, A_py, A_ptr = self.register_matrix(lib, m, n, order, 'A')

				# set A, A_py to A_rand
				A_py += A_rand
				self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) )

				# scale: A = alpha * A
				alpha = np.random.rand()
				A_rand *= alpha
				self.assertCall( lib.matrix_scale(A, alpha) )
				self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) )
				self.assertVecEqual( A_py, A_rand, ATOLMN, RTOL )

				# scale_left: A = diag(d) * A
				d, d_py, d_ptr = self.register_vector(lib, m, 'd')
				d_py[:] = np.random.rand(m)
				for i in xrange(m):
					A_rand[i, :] *= d_py[i]
				self.assertCall( lib.vector_memcpy_va(d, d_ptr, 1) )
				self.assertCall( lib.matrix_scale_left(A, d) )
				self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) )
				self.assertVecEqual( A_py, A_rand, ATOLMN, RTOL )

				# scale_right: A = A * diag(e)
				e, e_py, e_ptr = self.register_vector(lib, n, 'e')
				e_py[:] = np.random.rand(n)
				for j in xrange(n):
					A_rand[:, j] *= e_py[j]
				self.assertCall( lib.vector_memcpy_va(e, e_ptr, 1) )
				self.assertCall( lib.matrix_scale_right(A, e) )
				self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) )
				self.assertVecEqual( A_py, A_rand, ATOLMN, RTOL )

				# abs: A_ij = abs(A_ij)
				A_rand -= (A_rand.max() - A_rand.min()) / 2
				A_py *= 0
				A_py += A_rand
				A_rand = np.abs(A_rand)
				self.assertCall( lib.matrix_memcpy_ma(A, A_ptr, order) )
				self.assertCall( lib.matrix_abs(A) )
				self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) )
				self.assertVecEqual( A_py, A_rand, ATOLMN, RTOL )

				# pow
				p = 3 * np.random.rand()
				A_rand **= p
				self.assertCall( lib.matrix_pow(A, p) )
				self.assertCall( lib.matrix_memcpy_am(A_ptr, A, order) )
				self.assertVecEqual( A_py, A_rand, ATOLMN, RTOL )

				self.free_vars('d', 'e', 'A')
				self.assertCall( lib.ok_device_reset() )
Beispiel #8
0
class VectorTestCase(OptkitCTestCase):
	@classmethod
	def setUpClass(self):
		self.env_orig = os.getenv('OPTKIT_USE_LOCALLIBS', '0')
		os.environ['OPTKIT_USE_LOCALLIBS'] = '1'
		self.libs = DenseLinsysLibs()

	@classmethod
	def tearDownClass(self):
		os.environ['OPTKIT_USE_LOCALLIBS'] = self.env_orig

	def tearDown(self):
		self.free_all_vars()
		self.exit_call()

	def test_alloc(self):
		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			len_v = 10 + int(10 * np.random.rand())

			v = lib.vector(0, 0, None)
			self.assertEqual( v.size, 0 )
			self.assertEqual( v.stride, 0 )

			self.assertCall( lib.vector_calloc(v, len_v) )
			self.register_var('v', v, lib.vector_free)

			self.assertEqual( v.size, len_v )
			self.assertEqual( v.stride, 1 )

			if not gpu:
				for i in xrange(v.size):
					self.assertEqual( v.data[i], 0 )

			self.free_var('v')
			self.assertEqual( v.size, 0 )
			self.assertEqual( v.stride, 0 )

	def test_io(self):
		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			len_v = 10 + int(1000 * np.random.rand())
			DIGITS = 7 - 2 * single_precision
			RTOL = 10**(-DIGITS)
			ATOL = RTOL * len_v**0.5

			v, v_py, v_ptr = self.register_vector(lib, len_v, 'v')
			w, w_py, w_ptr = self.register_vector(lib, len_v, 'w')

			# set_all
			set_val = 5
			self.assertCall( lib.vector_set_all(v, set_val) )

			# memcpy_av
			self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
			for i in xrange(len_v):
				self.assertEqual(v_py[i], set_val)

			# memcpy_va
			w_rand = np.random.rand(len_v)
			w_py[:] = w_rand[:]
			self.assertCall( lib.vector_memcpy_va(w, w_ptr, 1) )
			w_py *= 0
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( w_py, w_rand, ATOL, RTOL )

			# memcpy_vv
			self.assertCall( lib.vector_memcpy_vv(v, w) )
			self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
			self.assertVecEqual( v_py, w_rand, ATOL, RTOL )

			# view_array
			if not gpu:
				u_rand = np.random.rand(len_v).astype(lib.pyfloat)
				u_ptr = u_rand.ctypes.data_as(lib.ok_float_p)
				u = lib.vector(0, 0, None)
				self.assertCall( lib.vector_view_array(u, u_ptr,
													   u_rand.size) )
				self.assertCall( lib.vector_memcpy_av(v_ptr, u, 1) )
	 			self.assertVecEqual( v_py, u_rand, ATOL, RTOL )

				# DON'T FREE u, DATA OWNED BY PYTHON

			self.free_vars('v', 'w')
			self.assertCall( lib.ok_device_reset() )

	def test_subvector(self):
		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			len_v = 10 + int(10 * np.random.rand())

			v, v_py, v_ptr = self.register_vector(lib, len_v, 'v')

			offset_sub = 3
			len_sub = 3
			v_sub = lib.vector(0, 0, None)
			self.assertCall( lib.vector_subvector(v_sub, v, offset_sub,
												  len_sub) )
			self.assertEqual( v_sub.size, 3 )
			self.assertEqual( v_sub.stride, v.stride )
			v_sub_py = np.zeros(len_sub).astype(lib.pyfloat)
			v_sub_ptr = v_sub_py.ctypes.data_as(lib.ok_float_p)
			self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
			self.assertCall( lib.vector_memcpy_av(v_sub_ptr, v_sub, 1) )
			for i in xrange(len_sub):
				self.assertEqual( v_sub_py[i], v_py[i + offset_sub] )

			self.free_var('v')
			self.assertCall( lib.ok_device_reset() )

	def test_math(self):
		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			val1 = 12 * np.random.rand()
			val2 = 5 * np.random.rand()
			len_v = 10 + int(1000 * np.random.rand())
			# len_v = 10 + int(10 * np.random.rand())

			DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU
			RTOL = 10**(-DIGITS)
			ATOL = RTOL * len_v**0.5

			v, v_py, v_ptr = self.register_vector(lib, len_v, 'v')
			w, w_py, w_ptr = self.register_vector(lib, len_v, 'w')

			# constant addition
			self.assertCall( lib.vector_add_constant(v, val1) )
			self.assertCall( lib.vector_add_constant(w, val2) )
			self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( v_py, val1, ATOL, RTOL )
			self.assertVecEqual( w_py, val2, ATOL, RTOL )

			# add two vectors
			self.assertCall( lib.vector_add(v, w) )
			val1 += val2
			self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( v_py, val1, ATOL, RTOL )
			self.assertVecEqual( w_py, val2, ATOL, RTOL )

			# subtract two vectors
			self.assertCall( lib.vector_sub(w, v) )
			val2 -= val1
			self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( v_py, val1, ATOL, RTOL )
			self.assertVecEqual( w_py, val2, ATOL, RTOL )

			# multiply two vectors
			self.assertCall( lib.vector_mul(v, w) )
			val1 *= val2
			self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( v_py, val1, ATOL, RTOL )
			self.assertVecEqual( w_py, val2, ATOL, RTOL )

			# vector scale
			scal = 3 * np.random.rand()
			val1 *= scal
			self.assertCall( lib.vector_scale(v, scal) )
			self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
			self.assertVecEqual( v_py, val1, ATOL, RTOL )

			# make sure v is strictly positive
			val1 = 0.7 + np.random.rand()
			self.assertCall( lib.vector_scale(v, 0) )
			self.assertCall( lib.vector_add_constant(v, val1) )

			# divide two vectors
			self.assertCall( lib.vector_div(w, v) )
			val2 /= float(val1)
			self.assertCall( lib.vector_memcpy_av(v_ptr, v, 1) )
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( v_py, val1, ATOL, RTOL )
			self.assertVecEqual( w_py, val2, ATOL, RTOL )

			# make w strictly negative
			w_max = w_py.max()
			val2 -= (w_max + 1)
			self.assertCall( lib.vector_add_constant(w, -(w_max + 1)) )

			# vector abs
			self.assertCall( lib.vector_abs(w) )
			val2 = abs(val2)
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( w_py, val2, ATOL, RTOL )

			# vector recip
			self.assertCall( lib.vector_recip(w) )
			val2 = 1. / val2
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( w_py, val2, ATOL, RTOL )

			# vector sqrt
			self.assertCall( lib.vector_sqrt(w) )
			val2 **= 0.5
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( w_py, val2, ATOL, RTOL )

			# vector pow
			pow_val = -2 + 4 * np.random.rand()
			self.assertCall( lib.vector_pow(w, pow_val) )
			val2 **= pow_val
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( w_py, val2, ATOL, RTOL )

			# vector exp
			self.assertCall( lib.vector_exp(w) )
			val2 = np.exp(val2)
			self.assertCall( lib.vector_memcpy_av(w_ptr, w, 1) )
			self.assertVecEqual( val2, w_py, ATOL, RTOL )

			# min / max
			w_py *= 0
			w_py += np.random.rand(len(w_py))
			self.assertCall( lib.vector_memcpy_va(w, w_ptr, 1) )

			# vector argmin
			wargmin = np.zeros(1).astype(c_size_t)
			wargmin_p = wargmin.ctypes.data_as(lib.c_size_t_p)
			self.assertCall( lib.vector_indmin(w, wargmin_p) )
			self.assertScalarEqual( w_py[wargmin[0]], w_py.min(), RTOL )

			# # vector min
			wmin = np.zeros(1).astype(lib.pyfloat)
			wmin_p = wmin.ctypes.data_as(lib.ok_float_p)
			self.assertCall( lib.vector_min(w, wmin_p) )
			self.assertScalarEqual( wmin[0], w_py.min(), RTOL )

			# # vector max
			wmax = wmin
			wmax_p = wmin_p
			self.assertCall( lib.vector_max(w, wmax_p) )
			self.assertScalarEqual( wmax[0], w_py.max(), RTOL )

			self.free_vars('v', 'w')
			self.assertCall( lib.ok_device_reset() )

	def test_indvector_math(self):
		for (gpu, single_precision) in self.CONDITIONS:
			lib = self.libs.get(single_precision=single_precision, gpu=gpu)
			if lib is None:
				continue
			self.register_exit(lib.ok_device_reset)

			val1 = 12 * np.random.rand()
			val2 = 5 * np.random.rand()
			len_v = 10 + int(1000 * np.random.rand())
			# len_v = 10 + int(10 * np.random.rand())

			DIGITS = 7 - 2 * lib.FLOAT - 1 * lib.GPU
			RTOL = 10**(-DIGITS)

			w, w_py, w_ptr = self.register_indvector(lib, len_v, 'w')

			# min / max
			w_py *= 0
			w_py += (30 * np.random.rand(len(w_py))).astype(w_py.dtype)
			self.assertCall( lib.indvector_memcpy_va(w, w_ptr, 1) )

			# vector argmin
			wargmin = np.zeros(1).astype(c_size_t)
			wargmin_p = wargmin.ctypes.data_as(lib.c_size_t_p)
			self.assertCall( lib.indvector_indmin(w, wargmin_p) )
			self.assertScalarEqual( w_py[wargmin[0]], w_py.min(), RTOL )

			# # vector min
			wmin = wargmin
			wmin_p = wargmin_p
			self.assertCall( lib.indvector_min(w, wmin_p) )
			self.assertScalarEqual( wmin[0], w_py.min(), RTOL )

			# vector max
			wmax = wmin
			wmax_p = wmin_p
			self.assertCall( lib.indvector_max(w, wmax_p) )
			self.assertScalarEqual( wmax[0], w_py.max(), RTOL )

			self.free_var('w')
			self.assertCall( lib.ok_device_reset() )