Exemple #1
0
 def test_three_elements(self):
     """ Make sure only 3D spaces can be successfully created. """
     self.assertRaises(TypeError, space.initialize_space, (100, ))
     self.assertRaises(TypeError, space.initialize_space, (100, 2))
     self.assertRaises(TypeError, space.initialize_space, (100, 2, 3, 4))
     self.assertRaises(TypeError, space.initialize_space, (100, 2, 3, 4, 5))
     space.initialize_space((100, 2, 3))
 def test_grid_shape(self):
     """ Make sure the grid shapes of the exec configs are correct. """
     tot_threads = lambda gs, bs: (gs[0] * bs[0], gs[1] * bs[1])
     for case in self.cases:
         space.initialize_space(case['shape'])
         z = Out(case['dtype'])
         fun = Kernel('', ('z', 'out', z.dtype)) 
         for cfg in fun.exec_configs:
             for ind in range(2):
                 self.assertTrue(cfg['grid_shape'][ind] * \
                                 cfg['block_shape'][ind] >= \
                                 case['shape'][ind+1])
                 self.assertTrue((cfg['grid_shape'][ind]-1) * \
                                 cfg['block_shape'][ind] < \
                                 case['shape'][ind+1])
         # One padded case.
         fun = Kernel('', ('z', 'out', z.dtype), padding=(1,2,3,4)) 
         pad = [3, 7]
         for cfg in fun.exec_configs:
             for ind in range(2):
                 self.assertTrue(cfg['grid_shape'][ind] * \
                                 (cfg['block_shape'][ind]-pad[ind]) >= \
                                 case['shape'][ind+1])
                 self.assertTrue((cfg['grid_shape'][ind]-1) * \
                                 (cfg['block_shape'][ind]-pad[ind]) < \
                                 case['shape'][ind+1])
Exemple #3
0
 def test_three_elements(self):
     """ Make sure only 3D spaces can be successfully created. """
     self.assertRaises(TypeError, space.initialize_space, (100,))
     self.assertRaises(TypeError, space.initialize_space, (100, 2))
     self.assertRaises(TypeError, space.initialize_space, (100, 2, 3, 4))
     self.assertRaises(TypeError, space.initialize_space, (100, 2, 3, 4, 5))
     space.initialize_space((100, 2, 3))
    def test_padded_kernel(self):
        """ Implement a simple padded kernel. """
        for case in self.cases:
            # Form data to work on.
            space.initialize_space(case['shape'])
            x_np = comm.allreduce(np.random.randn(*case['shape']).astype(case['dtype']))
            x = Grid(x_np, x_overlap=1)
            s_np = comm.allreduce(np.random.randn(1).astype(case['dtype']))
            s = Const(s_np)
            z = Out(case['dtype'])

            # Make a kernel.
            code = Template("""
                            if (_in_local && _in_global) {
                                x(0,0,0) = s(0) * x(0,0,0);
                                z += a * x(0,0,0);
                            }
                            """).render()
            fun = Kernel(code, \
                        ('a', 'number', case['dtype']), \
                        ('x', 'grid', x.dtype), \
                        ('s', 'const', s.dtype, s.data.size), \
                        ('z', 'out', z.dtype), \
                        padding=(1,1,1,1))

            # Execute and check the result.
            fun(case['dtype'](2), x, s, z)
            gpu_sum = z.get()
            cpu_sum = np.sum(2.0 * s_np * x_np)
            err = abs(gpu_sum - cpu_sum) / abs(cpu_sum)
            # print case, err
            if case['dtype'] in (np.float32, np.complex64):
                self.assertTrue(err < 1e-2, (case, err))
            else:
                self.assertTrue(err < 1e-6, (case, err))
Exemple #5
0
    def test_batch_sum(self):
        """ Make sure batch summing works. """
        num_outs = 3
        for case in self.cases:
            space.initialize_space(case['shape'])
            x = [Out(case['dtype'], op='sum') for k in range(num_outs)]
            x_cpu_data = [np.random.randn(*case['shape'][1:])\
                            .astype(case['dtype']) for k in range(num_outs)]
                    
            if case['dtype'] in (np.complex64, np.complex128):
                for k in range(num_outs):
                    x_cpu_data[k] = (1 + 1j) * x_cpu_data[k]

            res_gold = []
            for k in range(num_outs):
                x[k].data.set(x_cpu_data[k])
                res_gold.append(comm.allreduce(np.sum(x_cpu_data[k].flatten())))

            batch_reduce(*x)
            res_gpu = [x_indiv.get() for x_indiv in x]

            for k in range(num_outs):
                err = abs(res_gold[k] - res_gpu[k]) / abs(res_gold[k])

                if case['dtype'] in (np.float32, np.complex64):
                    self.assertTrue(err < 1e-3)
                else:
                    self.assertTrue(err < 1e-10)
Exemple #6
0
 def test_init(self):
     """ Test initialize function. """
     for case in self.cases:
         untype_array = np.zeros(case['shape']).astype(np.int)
         space.initialize_space(case['shape'])
         self.assertRaises(TypeError, Const, np.int)
         self.assertRaises(TypeError, Const, untype_array)
         self.assertRaises(TypeError, Const, 'string')
         Const(np.random.randn(10).astype(case['dtype']))
Exemple #7
0
    def test_get_info(self):
        """ Test the get_space_info function. """
        #         # We should get an error if we haven't initialized a space yet.
        #         self.assertRaises(TypeError, space.get_space_info)

        shape = (100, 2, 3)
        space.initialize_space(shape)
        info = space.get_space_info()
        self.assertEqual(info['shape'], shape)
Exemple #8
0
    def test_get_info(self):
        """ Test the get_space_info function. """
#         # We should get an error if we haven't initialized a space yet.
#         self.assertRaises(TypeError, space.get_space_info)

        shape = (100,2,3)
        space.initialize_space(shape)
        info = space.get_space_info()
        self.assertEqual(info['shape'], shape)
Exemple #9
0
 def test_partition(self):
     """ Make sure the x_ranges span the entire space without any gaps. """
     shapes = ((200, 30, 10), (33, 10, 10), (130, 5, 5), (111, 2, 2))
     for shape in shapes:
         space.initialize_space(shape)
         x = comm.gather(space.get_space_info()['x_range'])
         if comm.Get_rank() == 0:
             self.assertEqual(x[0][0], 0)
             self.assertEqual(x[-1][-1], space.get_space_info()['shape'][0])
             for k in range(len(x) - 1):
                 self.assertEqual(x[k][1], x[k + 1][0])
Exemple #10
0
 def test_init(self):
     """ Test initialize function. """
     for case in self.cases:
         untype_array = np.zeros(case['shape']).astype(np.int)
         space.initialize_space(case['shape'])
         self.assertRaises(TypeError, Out, np.int)
         self.assertRaises(TypeError, Out, untype_array)
         self.assertRaises(TypeError, Out, 'string')
         self.assertRaises(TypeError, Out, np.complex128, op='bad')
         Out(case['dtype'])
         Out(case['dtype'], op='sum')
Exemple #11
0
 def test_partition(self):
     """ Make sure the x_ranges span the entire space without any gaps. """
     shapes = ((200,30,10), (33,10,10), (130,5,5), (111,2,2))
     for shape in shapes:
         space.initialize_space(shape)
         x = comm.gather(space.get_space_info()['x_range'])
         if comm.Get_rank() == 0:
             self.assertEqual(x[0][0], 0)
             self.assertEqual(x[-1][-1], space.get_space_info()['shape'][0])
             for k in range(len(x)-1):
                 self.assertEqual(x[k][1], x[k+1][0])
Exemple #12
0
 def test_init(self):
     """ Just make sure we can initialize the kernel. """
     for case in self.cases:
         # Form data to work on.
         space.initialize_space(case['shape'])
         x_np = np.random.randn(*case['shape']).astype(case['dtype'])
         x = Grid(x_np)
         fun = Kernel('', ('x', 'grid', x.dtype))
         fun = Kernel('', ('x', 'grid', x.dtype), shape_filter='all')
         fun = Kernel('', ('x', 'grid', x.dtype), shape_filter='skinny')
         fun = Kernel('', ('x', 'grid', x.dtype), shape_filter='square')
         self.assertRaises(TypeError, Kernel, '', ('x', 'grid', x.dtype), \
                                         shape_filter1='all')
         self.assertRaises(TypeError, Kernel, '', ('x', 'grid', x.dtype), \
                                         shape_filter='blah')
Exemple #13
0
    def test_to_and_from_gpu(self):
        """ Make sure we can load and unload data off the gpu. """
        shape = (100,100,100)
        d = Data()
        space.initialize_space(shape)

        for dtype in self.valid_dtypes:
            # Create data to load.
            d_cpu = np.random.randn(*shape).astype(dtype)
            if dtype in (np.complex64, np.complex128):
                d_cpu = (1 + 1j) * d_cpu

            # Load and retrieve.
            d.to_gpu(d_cpu)
            self.assertTrue((d_cpu == d.get()).all())
Exemple #14
0
 def test_init(self):
     """ Test initialize function. """
     for case in self.cases:
         unfit_array = np.zeros(10)
         untype_array = np.zeros(case['shape']).astype(np.int)
         space.initialize_space(case['shape'])
         self.assertRaises(TypeError, Grid, np.int)
         self.assertRaises(TypeError, Grid, unfit_array)
         self.assertRaises(TypeError, Grid, untype_array)
         self.assertRaises(TypeError, Grid, 'string')
         self.assertRaises(TypeError, Grid, np.float32, x_overlap='a')
         self.assertRaises(TypeError, Grid, np.float32, x_overlap=2.2)
         self.assertRaises(TypeError, Grid, np.float32, x_overlap=-2)
         Grid(np.random.randn(*case['shape']).astype(case['dtype']))
         Grid(case['dtype'])
         Grid(np.random.randn(*case['shape']).astype(case['dtype']), x_overlap=1)
         Grid(case['dtype'], x_overlap=2)
Exemple #15
0
    def test_kernel_self_opt(self):
        """ Make sure the kernel settles on the fastest configuration. """
        for case in (self.cases[0],):
            space.initialize_space(case['shape'])
            z = Out(case['dtype'])
            fun = Kernel('', ('z', 'out', z.dtype), shape_filter='square') 

            # Run through all configurations.
            hist = []
            while fun.exec_configs:
                hist.append(fun(z))

            # Find fastest config, early-bird wins ties.
            best_time, best_cfg = min(hist, key=lambda x: x[0]) 

            time, next_cfg = fun(z) # Run once more, should use fastest configuration.

            # Make sure we have chosen the fastest configuration.
            self.assertEqual(best_cfg, next_cfg)
Exemple #16
0
    def test_sum(self):
        """ Make sure summing works. """
        for case in self.cases:
            space.initialize_space(case['shape'])
            x = Out(case['dtype'], op='sum')
            x_cpu_data = np.random.randn(*case['shape'][1:]).astype(case['dtype'])
            if case['dtype'] in (np.complex64, np.complex128):
                x_cpu_data = (1 + 1j) * x_cpu_data

            x.data.set(x_cpu_data)
            res_gold = comm.allreduce(np.sum(x_cpu_data.flatten()))

            x.reduce()
            err = abs(res_gold - x.get()) / abs(res_gold)

            if case['dtype'] in (np.float32, np.complex64):
                self.assertTrue(err < 1e-3)
            else:
                self.assertTrue(err < 1e-10)
Exemple #17
0
    def test_simple_example(self):
        """ Implement a simple kernel. """
        # Form data to work on.
        shape = (100, 100, 100)
        space.initialize_space(shape)
        x = Grid((1 + 1j) * np.ones(shape).astype(np.complex128))
        z = Out(np.float64)

        # Make a kernel.
        code = """  
                if (_in_global) { // Need to be in the space.
                    z += real(x(0,0,0)) + imag(x(0,0,0));
                } """
        fun = Kernel(code, \
                    ('x', 'grid', x.dtype), \
                    ('z', 'out', z.dtype))

        # Execute and check the result.
        fun(x, z)
        gpu_sum = z.get()
    def test_simple_example(self):
        """ Implement a simple kernel. """
        # Form data to work on.
        shape = (100,100,100)
        space.initialize_space(shape)
        x = Grid((1 + 1j) * np.ones(shape).astype(np.complex128))
        z = Out(np.float64)

        # Make a kernel.
        code = """  
                if (_in_global) { // Need to be in the space.
                    z += real(x(0,0,0)) + imag(x(0,0,0));
                } """
        fun = Kernel(code, \
                    ('x', 'grid', x.dtype), \
                    ('z', 'out', z.dtype))

        # Execute and check the result.
        fun(x, z)
        gpu_sum = z.get()
Exemple #19
0
    def test_recover(self):
        """ Make sure we can store and retrieve information from the GPU. """
        for case in self.cases:
            space.initialize_space(case['shape'])
            data = np.random.randn(*case['shape']).astype(case['dtype'])
            cpu_data = np.empty_like(data)
            comm.Allreduce(data, cpu_data)
            g = Grid(cpu_data)
            gpu_data = g.get()
            if comm.Get_rank() == 0:
                self.assertTrue((cpu_data == gpu_data).all())

            # Test with-overlap cases as well.
            for k in range(1, 3):
                g = Grid(cpu_data, x_overlap=k)
                gpu_data = g.get()
                if comm.Get_rank() == 0:
                    self.assertTrue((cpu_data == gpu_data).all())

                cpu_raw = get_cpu_raw(cpu_data, k)
                self.assertTrue((cpu_raw == g._get_raw()).all())
Exemple #20
0
    def test_simple_kernel(self):
        """ Implement a simple kernel. """
        for case in self.cases:
            # Form data to work on.
            space.initialize_space(case['shape'])
            x_np = comm.allreduce(np.random.randn(*case['shape']).astype(case['dtype']))
            x = Grid(x_np, x_overlap=2)
            s_np = comm.allreduce(np.random.randn(case['shape'][0],1,1).astype(case['dtype']))
            s = Const(s_np)
            z = Out(case['dtype'])

            # Make a kernel.
            code = Template("""
                            if (_in_local && _in_global) {
                                z += a * s(_X) * x(0,0,0);
                                // z += a * x(0,0,0);
                            }
                            """).render()
            fun = Kernel(code, \
                        ('a', 'number', case['dtype']), \
                        ('x', 'grid', x.dtype), \
                        ('s', 'const', s.dtype), \
                        ('z', 'out', z.dtype), \
                        shape_filter='all')

            # Execute and check the result.
            # fun()
            while fun.exec_configs:
            # for k in range(40):
                fun(case['dtype'](2.0), x, s, z)
                # fun(case['dtype'](2.0), x, z)
                gpu_sum = z.get()
                cpu_sum = np.sum(2 * s_np * x_np)
                # cpu_sum = np.sum(2 * x_np)
                err = abs(gpu_sum - cpu_sum) / abs(cpu_sum)
                if case['dtype'] in (np.float32, np.complex64):
                    self.assertTrue(err < 1e-2, (case, err))
                else:
                    self.assertTrue(err < 1e-6, (case, err))
Exemple #21
0
    def test_synchronize(self):
        """ Make sure that we can make the overlap spaces accurate. """
        for case in self.cases:
            space.initialize_space(case['shape'])
            data = np.random.randn(*case['shape']).astype(case['dtype'])
            cpu_data = np.empty_like(data)
            comm.Allreduce(data, cpu_data)
            g = Grid(case['dtype'])
            self.assertRaises(TypeError, g.synchronize) # No overlap.
            # Test with-overlap cases as well.
            for k in range(1, 4):
                g = Grid(case['dtype'], x_overlap=k)

                # Overwrite entire grid
                data = np.random.randn(*case['shape']).astype(case['dtype'])
                cpu_data = np.empty_like(data)
                comm.Allreduce(data, cpu_data)
                cpu_raw_bad = get_cpu_raw(cpu_data, k)
                cpu_raw_bad[:k,:,:] += 1 # Mess up padding areas.
                cpu_raw_bad[-k:,:,:] += 1
                drv.memcpy_htod(g.data.ptr, cpu_raw_bad)

                # Prove that the data is not synchronized at this time.
                cpu_raw = get_cpu_raw(cpu_data, k)
                xx = case['shape'][0]
                gd = g._get_raw()
                self.assertTrue((gd[:k,:,:] != cpu_raw[:k,:,:]).all())
                self.assertTrue((gd[-k:,:,:] != cpu_raw[-k:,:,:]).all())

                g.synchronize() # Synchronize the overlapping data.

                # Make sure that the overlap data is accurate.
                gd = g._get_raw()
                self.assertTrue((gd[:k,:,:] == cpu_raw[:k,:,:]).all())
                self.assertTrue((gd[-k:,:,:] == cpu_raw[-k:,:,:]).all())

                comm.Barrier() # Wait for other mpi nodes to finish.
Exemple #22
0
 def test_ecc_disabled(self):
     """ Make sure ECC is disabled. """
     space.initialize_space((100, 2, 3))
     self.assertTrue(space.get_space_info()['ecc_enabled'] == False, \
         'ECC enabled! Should be disabled for best performance.')
Exemple #23
0
 def test_ecc_disabled(self):
     """ Make sure ECC is disabled. """
     space.initialize_space((100, 2, 3))
     self.assertTrue(space.get_space_info()['ecc_enabled'] == False, \
         'ECC enabled! Should be disabled for best performance.')