def setUpClass(cls): np.random.seed(0) cls.ctx = make_default_context() cls.handle = cublasxt.cublasXtCreate() cls.nbDevices = 1 cls.deviceId = np.array([0], np.int32) cublasxt.cublasXtDeviceSelect(cls.handle, cls.nbDevices, cls.deviceId)
def setUp(self): np.random.seed(0) self.handle = cublasxt.cublasXtCreate() self.nbDevices = 1 self.deviceId = np.array([0], np.int32) cublasxt.cublasXtDeviceSelect(self.handle, self.nbDevices, self.deviceId)
def test_cublasXtZgemm(self): a = (np.random.rand(4, 4)+1j*np.random.rand(4, 4)).astype(np.complex256) b = (np.random.rand(4, 4)+1j*np.random.rand(4, 4)).astype(np.complex256) c = np.zeros((4, 4), np.complex256) cublasxt.cublasXtDeviceSelect(handle, 2, np.array([0, 1], np.int32).ctypes.data) cublasxt.cublasXtZgemm(self.handle, cublasxt._CUBLAS_OP['N'], cublasxt._CUBLAS_OP['N'], 4, 4, 4, np.complex256(1.0), a.ctypes.data, 4, b.ctypes.data, 4, np.complex256(0.0), c.ctypes.data, 4) np.allclose(np.dot(b.T, a.T).T, c)
def test_cublasXtDgemm(self): a = np.random.rand(4, 4).astype(np.float64) b = np.random.rand(4, 4).astype(np.float64) c = np.zeros((4, 4), np.float64) cublasxt.cublasXtDeviceSelect(handle, 2, np.array([0, 1], np.int32).ctypes.data) cublasxt.cublasXtDgemm(self.handle, cublasxt._CUBLAS_OP['N'], cublasxt._CUBLAS_OP['N'], 4, 4, 4, np.float64(1.0), a.ctypes.data, 4, b.ctypes.data, 4, np.float64(0.0), c.ctypes.data, 4) np.allclose(np.dot(b.T, a.T).T, c)
def test_cublasXtZgemm(self): a = (np.random.rand(4, 4) + 1j * np.random.rand(4, 4)).astype( np.complex256) b = (np.random.rand(4, 4) + 1j * np.random.rand(4, 4)).astype( np.complex256) c = np.zeros((4, 4), np.complex256) cublasxt.cublasXtDeviceSelect(handle, 2, np.array([0, 1], np.int32).ctypes.data) cublasxt.cublasXtZgemm(self.handle, cublasxt._CUBLAS_OP['N'], cublasxt._CUBLAS_OP['N'], 4, 4, 4, np.complex256(1.0), a.ctypes.data, 4, b.ctypes.data, 4, np.complex256(0.0), c.ctypes.data, 4) np.allclose(np.dot(b.T, a.T).T, c)
import os import scipy from data_iter import DataIter from mixmodule import create_net, mixModule print 'mxnet version' + mx.__version__ # ctx = [mx.gpu(i) for i in range(3)] ctx = [mx.gpu(0)] handle = cublasxt.cublasXtCreate() # mode = cublasxt.cublasXtGetPinningMemMode(handle) cublasxt.cublasXtSetPinningMemMode(handle, 1) cublasxt.cublasXtSetCpuRatio(handle, 0, 0, 0.9) nbDevices = len(ctx) deviceId = np.array(range(nbDevices), np.int32) cublasxt.cublasXtDeviceSelect(handle, nbDevices, deviceId) num_epoch = 1000000 batch_size = 64 * nbDevices show_period = 1000 assert (batch_size % nbDevices == 0) bsz_per_device = batch_size / nbDevices print 'batch_size per device:', bsz_per_device # featdim = 128 featdim = 512 total_proxy_num = 285000 data_shape = (batch_size, 3, 240, 120) # proxy_Z_shape = (featdim, total_proxy_num) proxy_Z_fn = './proxy_Z.npy'
def setUp(self): self.handle = cublasxt.cublasXtCreate() self.nbDevices = 1 self.deviceId = np.array([0], np.int32) cublasxt.cublasXtDeviceSelect(self.handle, self.nbDevices, self.deviceId.ctypes.data)