def test_all_gather(self): texp = np.arange(self.size * 10, dtype='int32') cpu = np.arange(self.rank * 10, self.rank * 10 + 10, dtype='int32') a = cpu gpu = gpuarray.asarray(a, context=self.ctx) resgpu = self.gpucomm.all_gather(gpu, nd_up=0) check_all(resgpu, texp) a = cpu.reshape((2, 5), order='C') exp = texp.reshape((2 * self.size, 5), order='C') gpu = gpuarray.asarray(a, context=self.ctx) resgpu = self.gpucomm.all_gather(gpu, nd_up=0) check_all(resgpu, exp) a = cpu.reshape((2, 5), order='C') exp = texp.reshape((self.size, 2, 5), order='C') gpu = gpuarray.asarray(a, context=self.ctx) resgpu = self.gpucomm.all_gather(gpu, nd_up=1) check_all(resgpu, exp) a = cpu.reshape((2, 5), order='C') exp = texp.reshape((self.size, 1, 1, 2, 5), order='C') gpu = gpuarray.asarray(a, context=self.ctx) resgpu = self.gpucomm.all_gather(gpu, nd_up=3) check_all(resgpu, exp) a = cpu.reshape((5, 2), order='F') exp = texp.reshape((5, 2 * self.size), order='F') gpu = gpuarray.asarray(a, context=self.ctx) resgpu = self.gpucomm.all_gather(gpu, nd_up=0) check_all(resgpu, exp) a = cpu.reshape((5, 2), order='F') exp = texp.reshape((5, 2, self.size), order='F') gpu = gpuarray.asarray(a, context=self.ctx) resgpu = self.gpucomm.all_gather(gpu, nd_up=1) check_all(resgpu, exp) a = cpu.reshape((5, 2), order='F') exp = texp.reshape((5, 2, 1, 1, self.size), order='F') gpu = gpuarray.asarray(a, context=self.ctx) resgpu = self.gpucomm.all_gather(gpu, nd_up=3) check_all(resgpu, exp) with self.assertRaises(Exception): resgpu = self.gpucomm.all_gather(gpu, nd_up=-2)
def test_all_gather(self): texp = np.arange(self.size * 10, dtype='int32') cpu = np.arange(self.rank * 10, self.rank * 10 + 10, dtype='int32') a = cpu gpu = gpuarray.asarray(a, context=self.ctx) resgpu = self.gpucomm.all_gather(gpu, nd_up=0) check_all(resgpu, texp) a = cpu.reshape((2, 5), order='C') exp = texp.reshape((2 * self.size, 5), order='C') gpu = gpuarray.asarray(a, context=self.ctx) resgpu = self.gpucomm.all_gather(gpu, nd_up=0) check_all(resgpu, exp) a = cpu.reshape((2, 5), order='C') exp = texp.reshape((self.size, 2, 5), order='C') gpu = gpuarray.asarray(a, context=self.ctx) resgpu = self.gpucomm.all_gather(gpu, nd_up=1) check_all(resgpu, exp) a = cpu.reshape((2, 5), order='C') exp = texp.reshape((self.size, 1, 1, 2, 5), order='C') gpu = gpuarray.asarray(a, context=self.ctx) resgpu = self.gpucomm.all_gather(gpu, nd_up=3) check_all(resgpu, exp) a = cpu.reshape((5, 2), order='F') exp = texp.reshape((5, 2 * self.size), order='F') gpu = gpuarray.asarray(a, context=self.ctx, order='F') resgpu = self.gpucomm.all_gather(gpu, nd_up=0) check_all(resgpu, exp) a = cpu.reshape((5, 2), order='F') exp = texp.reshape((5, 2, self.size), order='F') gpu = gpuarray.asarray(a, context=self.ctx, order='F') resgpu = self.gpucomm.all_gather(gpu, nd_up=1) check_all(resgpu, exp) a = cpu.reshape((5, 2), order='F') exp = texp.reshape((5, 2, 1, 1, self.size), order='F') gpu = gpuarray.asarray(a, context=self.ctx, order='F') resgpu = self.gpucomm.all_gather(gpu, nd_up=3) check_all(resgpu, exp) with self.assertRaises(Exception): resgpu = self.gpucomm.all_gather(gpu, nd_up=-2)
def test_reduce_scatter(self): texp = self.size * np.arange(5 * self.size) + sum(range(self.size)) exp = texp[self.rank * 5:self.rank * 5 + 5] # order c cpu = np.arange(5 * self.size) + self.rank np.reshape(cpu, (self.size, 5), order='C') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = gpuarray.empty((5, ), dtype='int64', order='C', context=self.ctx) self.gpucomm.reduce_scatter(gpu, 'sum', resgpu) assert np.allclose(resgpu, exp) # order f cpu = np.arange(5 * self.size) + self.rank np.reshape(cpu, (5, self.size), order='F') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = gpuarray.empty((5, ), dtype='int64', order='F', context=self.ctx) self.gpucomm.reduce_scatter(gpu, 'sum', resgpu) assert np.allclose(resgpu, exp) # make result order c (one less dim) cpu = np.arange(5 * self.size) + self.rank np.reshape(cpu, (self.size, 5), order='C') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') check_all(resgpu, exp) assert resgpu.flags['C_CONTIGUOUS'] is True # c-contiguous split problem (for size == 1, it can always be split) if self.size != 1: cpu = np.arange(5 * (self.size + 1), dtype='int32') + self.rank np.reshape(cpu, (self.size + 1, 5), order='C') gpu = gpuarray.asarray(cpu, context=self.ctx) with self.assertRaises(TypeError): resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') # make result order f (one less dim) cpu = np.arange(5 * self.size) + self.rank np.reshape(cpu, (5, self.size), order='F') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') check_all(resgpu, exp) assert resgpu.flags['F_CONTIGUOUS'] is True # f-contiguous split problem (for size == 1, it can always be split) if self.size != 1: cpu = np.arange(5 * (self.size + 1), dtype='int32') + self.rank np.reshape(cpu, (5, self.size + 1), order='F') gpu = gpuarray.asarray(cpu, context=self.ctx) with self.assertRaises(TypeError): resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') # make result order c (same dim - less size) texp = self.size * np.arange(5 * self.size * 3) + sum(range(self.size)) exp = texp[self.rank * 15:self.rank * 15 + 15] np.reshape(exp, (3, 5), order='C') cpu = np.arange(5 * self.size * 3) + self.rank np.reshape(cpu, (self.size * 3, 5), order='C') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') check_all(resgpu, exp) assert resgpu.flags['C_CONTIGUOUS'] is True # make result order f (same dim - less size) texp = self.size * np.arange(5 * self.size * 3) + sum(range(self.size)) exp = texp[self.rank * 15:self.rank * 15 + 15] np.reshape(exp, (5, 3), order='F') cpu = np.arange(5 * self.size * 3) + self.rank np.reshape(cpu, (5, self.size * 3), order='F') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') check_all(resgpu, exp) assert resgpu.flags['F_CONTIGUOUS'] is True
def test_reduce_scatter(self): texp = self.size * np.arange(5 * self.size) + sum(range(self.size)) exp = texp[self.rank * 5:self.rank * 5 + 5] # order c cpu = np.arange(5 * self.size) + self.rank np.reshape(cpu, (self.size, 5), order='C') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = gpuarray.empty((5,), dtype='int64', order='C', context=self.ctx) self.gpucomm.reduce_scatter(gpu, 'sum', resgpu) assert np.allclose(resgpu, exp) # order f cpu = np.arange(5 * self.size) + self.rank np.reshape(cpu, (5, self.size), order='F') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = gpuarray.empty((5,), dtype='int64', order='F', context=self.ctx) self.gpucomm.reduce_scatter(gpu, 'sum', resgpu) assert np.allclose(resgpu, exp) # make result order c (one less dim) cpu = np.arange(5 * self.size) + self.rank np.reshape(cpu, (self.size, 5), order='C') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') check_all(resgpu, exp) assert resgpu.flags['C_CONTIGUOUS'] is True # c-contiguous split problem (for size == 1, it can always be split) if self.size != 1: cpu = np.arange(5 * (self.size + 1), dtype='int32') + self.rank np.reshape(cpu, (self.size + 1, 5), order='C') gpu = gpuarray.asarray(cpu, context=self.ctx) with self.assertRaises(TypeError): resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') # make result order f (one less dim) cpu = np.arange(5 * self.size) + self.rank np.reshape(cpu, (5, self.size), order='F') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') check_all(resgpu, exp) assert resgpu.flags['F_CONTIGUOUS'] is True # f-contiguous split problem (for size == 1, it can always be split) if self.size != 1: cpu = np.arange(5 * (self.size + 1), dtype='int32') + self.rank np.reshape(cpu, (5, self.size + 1), order='F') gpu = gpuarray.asarray(cpu, context=self.ctx) with self.assertRaises(TypeError): resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') # make result order c (same dim - less size) texp = self.size * np.arange(5 * self.size * 3) + sum(range(self.size)) exp = texp[self.rank * 15:self.rank * 15 + 15] np.reshape(exp, (3, 5), order='C') cpu = np.arange(5 * self.size * 3) + self.rank np.reshape(cpu, (self.size * 3, 5), order='C') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') check_all(resgpu, exp) assert resgpu.flags['C_CONTIGUOUS'] is True # make result order f (same dim - less size) texp = self.size * np.arange(5 * self.size * 3) + sum(range(self.size)) exp = texp[self.rank * 15:self.rank * 15 + 15] np.reshape(exp, (5, 3), order='F') cpu = np.arange(5 * self.size * 3) + self.rank np.reshape(cpu, (5, self.size * 3), order='F') gpu = gpuarray.asarray(cpu, context=self.ctx) resgpu = self.gpucomm.reduce_scatter(gpu, 'sum') check_all(resgpu, exp) assert resgpu.flags['F_CONTIGUOUS'] is True