def __init__(self, array_or_dtype, x_overlap=0): """ Create a spatial grid on the GPU(s). Input variables array_or_dtype -- can either be a numpy array of the same shape as the global space, or a numpy dtype. If a valid array is passed, it will be loaded on to the GPU. If a dtype is passed, then an array of zeros, of that dtype will be loaded onto the GPU. Optional variables x_overlap -- the number of adjacent cells in either the negative or positive x-direction that need to simultaneously be accessed along with the current cell. Must be a non-negative integer. Default value is 0. """ shape = get_space_info()['shape'] # Get the shape of the space. xr = get_space_info()['x_range'] # Get the local x_range. all_x_ranges = get_space_info()['all_x_ranges'] # Get the local x_range. local_shape = (xr[1]-xr[0], shape[1], shape[2]) self._set_gce_type('grid') # Set the gce type to grid. # Make sure overlap option is valid. if type(x_overlap) is not int: raise TypeError('x_overlap must be an integer.') elif x_overlap < 0: raise TypeError('x_overlap must be a non-negative integer.') if comm.rank == 0: # Process the array_or_dtype input variable. if type(array_or_dtype) is np.ndarray: # Input is an array. array = array_or_dtype # Make sure the array is of the correct shape. if array.shape != shape: raise TypeError('Shape of array does not match shape of space.') # Make sure the array is of a valid datatype. self._get_dtype(array.dtype.type) elif type(array_or_dtype) is type: # Input is a datatype. self._get_dtype(array_or_dtype) # Validate the dtype. array = np.zeros(shape, dtype=self.dtype) # Make a zeros array. else: # Invalid input. raise TypeError('Input variable must be a numpy array or dtype') # Prepare array to be scattered. array = [array[r[0]:r[1],:,:] for r in all_x_ranges] else: array = None array = comm.scatter(array) self._get_dtype(array.dtype.type) # # Narrow down the array to local x_range. # array = array[xr[0]:xr[1],:,:] # Add padding to array, if needed. self._xlap = x_overlap if self._xlap is not 0: padding = np.empty((self._xlap,) + shape[1:3], dtype=array.dtype) array = np.concatenate((padding, array, padding), axis=0) self.to_gpu(array) # Load onto device. # Determine information needed for synchronization. if self._xlap is not 0: # Calculates the pointer to the x offset in a grid. ptr_dx = lambda x_pos: self.data.ptr + self.data.dtype.itemsize * \ x_pos * shape[1] * shape[2] # Pointers to different sections of the grid that are relevant # for synchronization. self._sync_ptrs = { 'forw_src': ptr_dx(xr[1]-xr[0]), \ 'back_dest': ptr_dx(0), \ 'back_src': ptr_dx(self._xlap), \ 'forw_dest': ptr_dx(xr[1]-xr[0] + self._xlap)} # Buffers used during synchronization. self._sync_buffers = [drv.pagelocked_empty( \ (self._xlap, shape[1], shape[2]), \ self.dtype) for k in range(4)] # Streams used during synchronization. self._sync_streams = [drv.Stream() for k in range(4)] # Used to identify neighboring MPI nodes with whom to synchronize. self._sync_adj = get_space_info()['mpi_adj'] # Offset in bytes to the true start of the grid. # This is used to "hide" overlap areas from the kernel. self._xlap_offset = self.data.dtype.itemsize * \ self._xlap * shape[1] * shape[2] self.synchronize() # Synchronize the grid. comm.Barrier() # Wait for all grids to synchronize before proceeding.
def __init__(self, array_or_dtype, x_overlap=0): """ Create a spatial grid on the GPU(s). Input variables array_or_dtype -- can either be a numpy array of the same shape as the global space, or a numpy dtype. If a valid array is passed, it will be loaded on to the GPU. If a dtype is passed, then an array of zeros, of that dtype will be loaded onto the GPU. Optional variables x_overlap -- the number of adjacent cells in either the negative or positive x-direction that need to simultaneously be accessed along with the current cell. Must be a non-negative integer. Default value is 0. """ shape = get_space_info()['shape'] # Get the shape of the space. xr = get_space_info()['x_range'] # Get the local x_range. all_x_ranges = get_space_info()[ 'all_x_ranges'] # Get the local x_range. local_shape = (xr[1] - xr[0], shape[1], shape[2]) self._set_gce_type('grid') # Set the gce type to grid. # Make sure overlap option is valid. if type(x_overlap) is not int: raise TypeError('x_overlap must be an integer.') elif x_overlap < 0: raise TypeError('x_overlap must be a non-negative integer.') if comm.rank == 0: # Process the array_or_dtype input variable. if type(array_or_dtype) is np.ndarray: # Input is an array. array = array_or_dtype # Make sure the array is of the correct shape. if array.shape != shape: raise TypeError( 'Shape of array does not match shape of space.') # Make sure the array is of a valid datatype. self._get_dtype(array.dtype.type) elif type(array_or_dtype) is type: # Input is a datatype. self._get_dtype(array_or_dtype) # Validate the dtype. array = np.zeros(shape, dtype=self.dtype) # Make a zeros array. else: # Invalid input. raise TypeError( 'Input variable must be a numpy array or dtype') # Prepare array to be scattered. array = [array[r[0]:r[1], :, :] for r in all_x_ranges] else: array = None array = comm.scatter(array) self._get_dtype(array.dtype.type) # # Narrow down the array to local x_range. # array = array[xr[0]:xr[1],:,:] # Add padding to array, if needed. self._xlap = x_overlap if self._xlap is not 0: padding = np.empty((self._xlap, ) + shape[1:3], dtype=array.dtype) array = np.concatenate((padding, array, padding), axis=0) self.to_gpu(array) # Load onto device. # Determine information needed for synchronization. if self._xlap is not 0: # Calculates the pointer to the x offset in a grid. ptr_dx = lambda x_pos: self.data.ptr + self.data.dtype.itemsize * \ x_pos * shape[1] * shape[2] # Pointers to different sections of the grid that are relevant # for synchronization. self._sync_ptrs = { 'forw_src': ptr_dx(xr[1]-xr[0]), \ 'back_dest': ptr_dx(0), \ 'back_src': ptr_dx(self._xlap), \ 'forw_dest': ptr_dx(xr[1]-xr[0] + self._xlap)} # Buffers used during synchronization. self._sync_buffers = [drv.pagelocked_empty( \ (self._xlap, shape[1], shape[2]), \ self.dtype) for k in range(4)] # Streams used during synchronization. self._sync_streams = [drv.Stream() for k in range(4)] # Used to identify neighboring MPI nodes with whom to synchronize. self._sync_adj = get_space_info()['mpi_adj'] # Offset in bytes to the true start of the grid. # This is used to "hide" overlap areas from the kernel. self._xlap_offset = self.data.dtype.itemsize * \ self._xlap * shape[1] * shape[2] self.synchronize() # Synchronize the grid. comm.Barrier( ) # Wait for all grids to synchronize before proceeding.
data = None data = CW.bcast(data, root=0) # or CW.Bcast(data, root=0) for numpy arrays print("On rank", rank, "data =", data) CW.Barrier() #You might have to do some testing of doing a task on one process and sending that to everyone else though. If you are sending big stuff it might take a while. #Something that is a litle bit different to broadcasting but still shares information on one node with others is scatter, which can scatter elements of a list, or array to all the process. This can be good with those independant tasks. So lets try scattering the pickle files: if rank == 0: pickle_file = glob.glob("test_data_*.pkl") else: pickle_file = None pickle_file = CW.scatter(pickle_file, root=0) print("On rank", rank, "pickle_file =", pickle_file) CW.Barrier() #Now that each rank has a file, it can do it's own thing. #If individual processes were doing their own tasks, but you need to collate everything, you can do that using gather, which puts everything from the different ranks in a list: data = (rank + 1)**2 print("On rank", rank, "data =", data) CW.Barrier() data = CW.gather(data, root=0) print("On rank", rank, "data =", data)