def test_three_elements(self): """ Make sure only 3D spaces can be successfully created. """ self.assertRaises(TypeError, space.initialize_space, (100, )) self.assertRaises(TypeError, space.initialize_space, (100, 2)) self.assertRaises(TypeError, space.initialize_space, (100, 2, 3, 4)) self.assertRaises(TypeError, space.initialize_space, (100, 2, 3, 4, 5)) space.initialize_space((100, 2, 3))
def test_grid_shape(self): """ Make sure the grid shapes of the exec configs are correct. """ tot_threads = lambda gs, bs: (gs[0] * bs[0], gs[1] * bs[1]) for case in self.cases: space.initialize_space(case['shape']) z = Out(case['dtype']) fun = Kernel('', ('z', 'out', z.dtype)) for cfg in fun.exec_configs: for ind in range(2): self.assertTrue(cfg['grid_shape'][ind] * \ cfg['block_shape'][ind] >= \ case['shape'][ind+1]) self.assertTrue((cfg['grid_shape'][ind]-1) * \ cfg['block_shape'][ind] < \ case['shape'][ind+1]) # One padded case. fun = Kernel('', ('z', 'out', z.dtype), padding=(1,2,3,4)) pad = [3, 7] for cfg in fun.exec_configs: for ind in range(2): self.assertTrue(cfg['grid_shape'][ind] * \ (cfg['block_shape'][ind]-pad[ind]) >= \ case['shape'][ind+1]) self.assertTrue((cfg['grid_shape'][ind]-1) * \ (cfg['block_shape'][ind]-pad[ind]) < \ case['shape'][ind+1])
def test_three_elements(self): """ Make sure only 3D spaces can be successfully created. """ self.assertRaises(TypeError, space.initialize_space, (100,)) self.assertRaises(TypeError, space.initialize_space, (100, 2)) self.assertRaises(TypeError, space.initialize_space, (100, 2, 3, 4)) self.assertRaises(TypeError, space.initialize_space, (100, 2, 3, 4, 5)) space.initialize_space((100, 2, 3))
def test_padded_kernel(self): """ Implement a simple padded kernel. """ for case in self.cases: # Form data to work on. space.initialize_space(case['shape']) x_np = comm.allreduce(np.random.randn(*case['shape']).astype(case['dtype'])) x = Grid(x_np, x_overlap=1) s_np = comm.allreduce(np.random.randn(1).astype(case['dtype'])) s = Const(s_np) z = Out(case['dtype']) # Make a kernel. code = Template(""" if (_in_local && _in_global) { x(0,0,0) = s(0) * x(0,0,0); z += a * x(0,0,0); } """).render() fun = Kernel(code, \ ('a', 'number', case['dtype']), \ ('x', 'grid', x.dtype), \ ('s', 'const', s.dtype, s.data.size), \ ('z', 'out', z.dtype), \ padding=(1,1,1,1)) # Execute and check the result. fun(case['dtype'](2), x, s, z) gpu_sum = z.get() cpu_sum = np.sum(2.0 * s_np * x_np) err = abs(gpu_sum - cpu_sum) / abs(cpu_sum) # print case, err if case['dtype'] in (np.float32, np.complex64): self.assertTrue(err < 1e-2, (case, err)) else: self.assertTrue(err < 1e-6, (case, err))
def test_batch_sum(self): """ Make sure batch summing works. """ num_outs = 3 for case in self.cases: space.initialize_space(case['shape']) x = [Out(case['dtype'], op='sum') for k in range(num_outs)] x_cpu_data = [np.random.randn(*case['shape'][1:])\ .astype(case['dtype']) for k in range(num_outs)] if case['dtype'] in (np.complex64, np.complex128): for k in range(num_outs): x_cpu_data[k] = (1 + 1j) * x_cpu_data[k] res_gold = [] for k in range(num_outs): x[k].data.set(x_cpu_data[k]) res_gold.append(comm.allreduce(np.sum(x_cpu_data[k].flatten()))) batch_reduce(*x) res_gpu = [x_indiv.get() for x_indiv in x] for k in range(num_outs): err = abs(res_gold[k] - res_gpu[k]) / abs(res_gold[k]) if case['dtype'] in (np.float32, np.complex64): self.assertTrue(err < 1e-3) else: self.assertTrue(err < 1e-10)
def test_init(self): """ Test initialize function. """ for case in self.cases: untype_array = np.zeros(case['shape']).astype(np.int) space.initialize_space(case['shape']) self.assertRaises(TypeError, Const, np.int) self.assertRaises(TypeError, Const, untype_array) self.assertRaises(TypeError, Const, 'string') Const(np.random.randn(10).astype(case['dtype']))
def test_get_info(self): """ Test the get_space_info function. """ # # We should get an error if we haven't initialized a space yet. # self.assertRaises(TypeError, space.get_space_info) shape = (100, 2, 3) space.initialize_space(shape) info = space.get_space_info() self.assertEqual(info['shape'], shape)
def test_get_info(self): """ Test the get_space_info function. """ # # We should get an error if we haven't initialized a space yet. # self.assertRaises(TypeError, space.get_space_info) shape = (100,2,3) space.initialize_space(shape) info = space.get_space_info() self.assertEqual(info['shape'], shape)
def test_partition(self): """ Make sure the x_ranges span the entire space without any gaps. """ shapes = ((200, 30, 10), (33, 10, 10), (130, 5, 5), (111, 2, 2)) for shape in shapes: space.initialize_space(shape) x = comm.gather(space.get_space_info()['x_range']) if comm.Get_rank() == 0: self.assertEqual(x[0][0], 0) self.assertEqual(x[-1][-1], space.get_space_info()['shape'][0]) for k in range(len(x) - 1): self.assertEqual(x[k][1], x[k + 1][0])
def test_init(self): """ Test initialize function. """ for case in self.cases: untype_array = np.zeros(case['shape']).astype(np.int) space.initialize_space(case['shape']) self.assertRaises(TypeError, Out, np.int) self.assertRaises(TypeError, Out, untype_array) self.assertRaises(TypeError, Out, 'string') self.assertRaises(TypeError, Out, np.complex128, op='bad') Out(case['dtype']) Out(case['dtype'], op='sum')
def test_partition(self): """ Make sure the x_ranges span the entire space without any gaps. """ shapes = ((200,30,10), (33,10,10), (130,5,5), (111,2,2)) for shape in shapes: space.initialize_space(shape) x = comm.gather(space.get_space_info()['x_range']) if comm.Get_rank() == 0: self.assertEqual(x[0][0], 0) self.assertEqual(x[-1][-1], space.get_space_info()['shape'][0]) for k in range(len(x)-1): self.assertEqual(x[k][1], x[k+1][0])
def test_init(self): """ Just make sure we can initialize the kernel. """ for case in self.cases: # Form data to work on. space.initialize_space(case['shape']) x_np = np.random.randn(*case['shape']).astype(case['dtype']) x = Grid(x_np) fun = Kernel('', ('x', 'grid', x.dtype)) fun = Kernel('', ('x', 'grid', x.dtype), shape_filter='all') fun = Kernel('', ('x', 'grid', x.dtype), shape_filter='skinny') fun = Kernel('', ('x', 'grid', x.dtype), shape_filter='square') self.assertRaises(TypeError, Kernel, '', ('x', 'grid', x.dtype), \ shape_filter1='all') self.assertRaises(TypeError, Kernel, '', ('x', 'grid', x.dtype), \ shape_filter='blah')
def test_to_and_from_gpu(self): """ Make sure we can load and unload data off the gpu. """ shape = (100,100,100) d = Data() space.initialize_space(shape) for dtype in self.valid_dtypes: # Create data to load. d_cpu = np.random.randn(*shape).astype(dtype) if dtype in (np.complex64, np.complex128): d_cpu = (1 + 1j) * d_cpu # Load and retrieve. d.to_gpu(d_cpu) self.assertTrue((d_cpu == d.get()).all())
def test_init(self): """ Test initialize function. """ for case in self.cases: unfit_array = np.zeros(10) untype_array = np.zeros(case['shape']).astype(np.int) space.initialize_space(case['shape']) self.assertRaises(TypeError, Grid, np.int) self.assertRaises(TypeError, Grid, unfit_array) self.assertRaises(TypeError, Grid, untype_array) self.assertRaises(TypeError, Grid, 'string') self.assertRaises(TypeError, Grid, np.float32, x_overlap='a') self.assertRaises(TypeError, Grid, np.float32, x_overlap=2.2) self.assertRaises(TypeError, Grid, np.float32, x_overlap=-2) Grid(np.random.randn(*case['shape']).astype(case['dtype'])) Grid(case['dtype']) Grid(np.random.randn(*case['shape']).astype(case['dtype']), x_overlap=1) Grid(case['dtype'], x_overlap=2)
def test_kernel_self_opt(self): """ Make sure the kernel settles on the fastest configuration. """ for case in (self.cases[0],): space.initialize_space(case['shape']) z = Out(case['dtype']) fun = Kernel('', ('z', 'out', z.dtype), shape_filter='square') # Run through all configurations. hist = [] while fun.exec_configs: hist.append(fun(z)) # Find fastest config, early-bird wins ties. best_time, best_cfg = min(hist, key=lambda x: x[0]) time, next_cfg = fun(z) # Run once more, should use fastest configuration. # Make sure we have chosen the fastest configuration. self.assertEqual(best_cfg, next_cfg)
def test_sum(self): """ Make sure summing works. """ for case in self.cases: space.initialize_space(case['shape']) x = Out(case['dtype'], op='sum') x_cpu_data = np.random.randn(*case['shape'][1:]).astype(case['dtype']) if case['dtype'] in (np.complex64, np.complex128): x_cpu_data = (1 + 1j) * x_cpu_data x.data.set(x_cpu_data) res_gold = comm.allreduce(np.sum(x_cpu_data.flatten())) x.reduce() err = abs(res_gold - x.get()) / abs(res_gold) if case['dtype'] in (np.float32, np.complex64): self.assertTrue(err < 1e-3) else: self.assertTrue(err < 1e-10)
def test_simple_example(self): """ Implement a simple kernel. """ # Form data to work on. shape = (100, 100, 100) space.initialize_space(shape) x = Grid((1 + 1j) * np.ones(shape).astype(np.complex128)) z = Out(np.float64) # Make a kernel. code = """ if (_in_global) { // Need to be in the space. z += real(x(0,0,0)) + imag(x(0,0,0)); } """ fun = Kernel(code, \ ('x', 'grid', x.dtype), \ ('z', 'out', z.dtype)) # Execute and check the result. fun(x, z) gpu_sum = z.get()
def test_simple_example(self): """ Implement a simple kernel. """ # Form data to work on. shape = (100,100,100) space.initialize_space(shape) x = Grid((1 + 1j) * np.ones(shape).astype(np.complex128)) z = Out(np.float64) # Make a kernel. code = """ if (_in_global) { // Need to be in the space. z += real(x(0,0,0)) + imag(x(0,0,0)); } """ fun = Kernel(code, \ ('x', 'grid', x.dtype), \ ('z', 'out', z.dtype)) # Execute and check the result. fun(x, z) gpu_sum = z.get()
def test_recover(self): """ Make sure we can store and retrieve information from the GPU. """ for case in self.cases: space.initialize_space(case['shape']) data = np.random.randn(*case['shape']).astype(case['dtype']) cpu_data = np.empty_like(data) comm.Allreduce(data, cpu_data) g = Grid(cpu_data) gpu_data = g.get() if comm.Get_rank() == 0: self.assertTrue((cpu_data == gpu_data).all()) # Test with-overlap cases as well. for k in range(1, 3): g = Grid(cpu_data, x_overlap=k) gpu_data = g.get() if comm.Get_rank() == 0: self.assertTrue((cpu_data == gpu_data).all()) cpu_raw = get_cpu_raw(cpu_data, k) self.assertTrue((cpu_raw == g._get_raw()).all())
def test_simple_kernel(self): """ Implement a simple kernel. """ for case in self.cases: # Form data to work on. space.initialize_space(case['shape']) x_np = comm.allreduce(np.random.randn(*case['shape']).astype(case['dtype'])) x = Grid(x_np, x_overlap=2) s_np = comm.allreduce(np.random.randn(case['shape'][0],1,1).astype(case['dtype'])) s = Const(s_np) z = Out(case['dtype']) # Make a kernel. code = Template(""" if (_in_local && _in_global) { z += a * s(_X) * x(0,0,0); // z += a * x(0,0,0); } """).render() fun = Kernel(code, \ ('a', 'number', case['dtype']), \ ('x', 'grid', x.dtype), \ ('s', 'const', s.dtype), \ ('z', 'out', z.dtype), \ shape_filter='all') # Execute and check the result. # fun() while fun.exec_configs: # for k in range(40): fun(case['dtype'](2.0), x, s, z) # fun(case['dtype'](2.0), x, z) gpu_sum = z.get() cpu_sum = np.sum(2 * s_np * x_np) # cpu_sum = np.sum(2 * x_np) err = abs(gpu_sum - cpu_sum) / abs(cpu_sum) if case['dtype'] in (np.float32, np.complex64): self.assertTrue(err < 1e-2, (case, err)) else: self.assertTrue(err < 1e-6, (case, err))
def test_synchronize(self): """ Make sure that we can make the overlap spaces accurate. """ for case in self.cases: space.initialize_space(case['shape']) data = np.random.randn(*case['shape']).astype(case['dtype']) cpu_data = np.empty_like(data) comm.Allreduce(data, cpu_data) g = Grid(case['dtype']) self.assertRaises(TypeError, g.synchronize) # No overlap. # Test with-overlap cases as well. for k in range(1, 4): g = Grid(case['dtype'], x_overlap=k) # Overwrite entire grid data = np.random.randn(*case['shape']).astype(case['dtype']) cpu_data = np.empty_like(data) comm.Allreduce(data, cpu_data) cpu_raw_bad = get_cpu_raw(cpu_data, k) cpu_raw_bad[:k,:,:] += 1 # Mess up padding areas. cpu_raw_bad[-k:,:,:] += 1 drv.memcpy_htod(g.data.ptr, cpu_raw_bad) # Prove that the data is not synchronized at this time. cpu_raw = get_cpu_raw(cpu_data, k) xx = case['shape'][0] gd = g._get_raw() self.assertTrue((gd[:k,:,:] != cpu_raw[:k,:,:]).all()) self.assertTrue((gd[-k:,:,:] != cpu_raw[-k:,:,:]).all()) g.synchronize() # Synchronize the overlapping data. # Make sure that the overlap data is accurate. gd = g._get_raw() self.assertTrue((gd[:k,:,:] == cpu_raw[:k,:,:]).all()) self.assertTrue((gd[-k:,:,:] == cpu_raw[-k:,:,:]).all()) comm.Barrier() # Wait for other mpi nodes to finish.
def test_ecc_disabled(self): """ Make sure ECC is disabled. """ space.initialize_space((100, 2, 3)) self.assertTrue(space.get_space_info()['ecc_enabled'] == False, \ 'ECC enabled! Should be disabled for best performance.')