def __init__(self, \ gpu_ctx, \ H, eta0, hu0, hv0, \ nx, ny, \ dx, dy, dt, \ g, f, r, A=0.0, \ t=0.0, \ coriolis_beta=0.0, \ y_zero_reference_cell = 0, \ wind_stress=WindStress.WindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ write_netcdf=False, \ ignore_ghostcells=False, \ offset_x=0, offset_y=0, \ block_width=16, block_height=16): """ Initialization routine H: Water depth incl ghost cells, (nx+2)*(ny+2) cells eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells nx: Number of cells along x-axis ny: Number of cells along y-axis dx: Grid cell spacing along x-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m) dt: Size of each timestep (90 s) g: Gravitational accelleration (9.81 m/s^2) f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y r: Bottom friction coefficient (2.4e-3 m/s) A: Eddy viscosity coefficient (O(dx)) t: Start simulation at time t coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0) y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell . wind_stress: Wind stress parameters boundary_conditions: Boundary condition object write_netcdf: Write the results after each superstep to a netCDF file """ # Sort out internally represented ghost_cells in the presence of given # boundary conditions halo_x = 1 halo_y = 1 ghost_cells_x = 1 ghost_cells_y = 1 y_zero_reference_cell = y_zero_reference_cell + 1 self.boundary_conditions = boundary_conditions if boundary_conditions.isSponge(): nx = nx + boundary_conditions.spongeCells[ 1] + boundary_conditions.spongeCells[3] - 2 * ghost_cells_x ny = ny + boundary_conditions.spongeCells[ 0] + boundary_conditions.spongeCells[2] - 2 * ghost_cells_y y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[ 2] # self.<parameters> are sat in parent constructor: rk_order = None theta = None super(CTCS, self).__init__(gpu_ctx, \ nx, ny, \ ghost_cells_x, \ ghost_cells_y, \ dx, dy, dt, \ g, f, r, A, \ t, \ theta, rk_order, \ coriolis_beta, \ y_zero_reference_cell, \ wind_stress, \ write_netcdf, \ ignore_ghostcells, \ offset_x, offset_y, \ block_width, block_height) # Index range for interior domain (north, east, south, west) # so that interior domain of eta is # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \ # self.interior_domain_indices[3]:self.interior_domain_indices[1] ] self.interior_domain_indices = np.array([-1, -1, 1, 1]) self._set_interior_domain_from_sponge_cells() #Get kernels self.u_kernel = gpu_ctx.get_kernel("CTCS_U_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) self.v_kernel = gpu_ctx.get_kernel("CTCS_V_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) self.eta_kernel = gpu_ctx.get_kernel("CTCS_eta_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) # Get CUDA functions self.computeUKernel = self.u_kernel.get_function("computeUKernel") self.computeVKernel = self.v_kernel.get_function("computeVKernel") self.computeEtaKernel = self.eta_kernel.get_function( "computeEtaKernel") # Prepare kernel lauches self.computeUKernel.prepare("iiiifffffffffPiPiPiPiPif") self.computeVKernel.prepare("iiiifffffffffPiPiPiPiPif") self.computeEtaKernel.prepare("iiffffffffPiPiPi") # Set up textures self.update_wind_stress(self.u_kernel, self.computeUKernel) self.update_wind_stress(self.v_kernel, self.computeVKernel) #Create data by uploading to device self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, halo_x, halo_y, H) self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, halo_x, halo_y, eta0, hu0, hv0) # Global size needs to be larger than the default from parent.__init__ self.global_size = ( \ int(np.ceil((self.nx+2*halo_x) / float(self.local_size[0]))), \ int(np.ceil((self.ny+2*halo_y) / float(self.local_size[1]))) \ ) self.bc_kernel = CTCS_boundary_condition(gpu_ctx, \ self.nx, \ self.ny, \ self.boundary_conditions, \ halo_x, halo_y \ ) if self.write_netcdf: self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \ staggered_grid=True, offset_x=self.offset_x, offset_y=self.offset_y)
def __init__(self, \ gpu_ctx, \ H, eta0, hu0, hv0, \ nx, ny, \ dx, dy, dt, \ g, f, r, \ t=0.0, \ coriolis_beta=0.0, \ y_zero_reference_cell = 0, \ wind_stress=WindStress.WindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ write_netcdf=False, \ ignore_ghostcells=False, \ offset_x=0, offset_y=0, \ block_width=16, block_height=16): """ Initialization routine H: Water depth incl ghost cells, (nx+2)*(ny+2) cells eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells nx: Number of cells along x-axis ny: Number of cells along y-axis dx: Grid cell spacing along x-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m) dt: Size of each timestep (90 s) g: Gravitational accelleration (9.81 m/s^2) f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y r: Bottom friction coefficient (2.4e-3 m/s) coriolis_beta: Coriolis linear factor -> f = f + beta*y y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell . wind_stress: Wind stress parameters boundary_conditions: Boundary condition object write_netcdf: Write the results after each superstep to a netCDF file """ #Create data by uploading to device ghost_cells_x = 0 ghost_cells_y = 0 y_zero_reference_cell = y_zero_reference_cell self.asym_ghost_cells = [0, 0, 0, 0] # [N, E, S, W] # Index range for interior domain (north, east, south, west) # so that interior domain of eta is # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \ # self.interior_domain_indices[3]:self.interior_domain_indices[1] ] self.interior_domain_indices = np.array([None, None, 0, 0]) self.boundary_conditions = boundary_conditions # Add asym ghost cell if periodic boundary condition: if (self.boundary_conditions.north == 2) or \ (self.boundary_conditions.south == 2): self.asym_ghost_cells[0] = 1 self.interior_domain_indices[0] = -1 if (self.boundary_conditions.east == 2) or \ (self.boundary_conditions.west == 2): self.asym_ghost_cells[1] = 1 self.interior_domain_indices[1] = -1 if boundary_conditions.isSponge(): nx = nx + boundary_conditions.spongeCells[ 1] + boundary_conditions.spongeCells[ 3] # - self.asym_ghost_cells[1] - self.asym_ghost_cells[3] ny = ny + boundary_conditions.spongeCells[ 0] + boundary_conditions.spongeCells[ 2] # - self.asym_ghost_cells[0] - self.asym_ghost_cells[2] y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[ 2] rk_order = None theta = None A = None super(FBL, self).__init__(gpu_ctx, \ nx, ny, \ ghost_cells_x, \ ghost_cells_y, \ dx, dy, dt, \ g, f, r, A, \ t, \ theta, rk_order, \ coriolis_beta, \ y_zero_reference_cell, \ wind_stress, \ write_netcdf, \ ignore_ghostcells, \ offset_x, offset_y, \ block_width, block_height) self._set_interior_domain_from_sponge_cells() #Get kernels self.u_kernel = gpu_ctx.get_kernel("FBL_U_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) self.v_kernel = gpu_ctx.get_kernel("FBL_V_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) self.eta_kernel = gpu_ctx.get_kernel("FBL_eta_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) # Get CUDA functions self.computeUKernel = self.u_kernel.get_function("computeUKernel") self.computeVKernel = self.v_kernel.get_function("computeVKernel") self.computeEtaKernel = self.eta_kernel.get_function( "computeEtaKernel") # Prepare kernel lauches self.computeUKernel.prepare("iiffffffffPiPiPiPif") self.computeVKernel.prepare("iiffffffffPiPiPiPif") self.computeEtaKernel.prepare("iiffffffffPiPiPiPi") # Set up textures self.update_wind_stress(self.u_kernel, self.computeUKernel) self.update_wind_stress(self.v_kernel, self.computeVKernel) self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, H, self.asym_ghost_cells) self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0, self.asym_ghost_cells) # Overwrite halo with asymetric ghost cells self.nx_halo = np.int32(nx + self.asym_ghost_cells[1] + self.asym_ghost_cells[3]) self.ny_halo = np.int32(ny + self.asym_ghost_cells[0] + self.asym_ghost_cells[2]) self.bc_kernel = FBL_periodic_boundary(self.gpu_ctx, \ self.nx, \ self.ny, \ self.boundary_conditions, \ self.asym_ghost_cells ) self.totalNumIterations = 0 if self.write_netcdf: self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \ staggered_grid=True, offset_x=self.offset_x, offset_y=self.offset_y)
def __init__(self, \ gpu_ctx, \ H, eta0, hu0, hv0, \ nx, ny, \ dx, dy, dt, \ g, f, r, \ t=0.0, \ coriolis_beta=0.0, \ y_zero_reference_cell = 1, \ wind_stress=WindStress.WindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ write_netcdf=False, \ comm=None, \ ignore_ghostcells=False, \ offset_x=0, offset_y=0, \ block_width=16, block_height=16): """ Initialization routine H: Water depth incl ghost cells, (nx+2)*(ny+2) cells eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+3) cells nx: Number of cells along x-axis ny: Number of cells along y-axis dx: Grid cell spacing along x-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m) dt: Size of each timestep (90 s) g: Gravitational accelleration (9.81 m/s^2) f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y r: Bottom friction coefficient (2.4e-3 m/s) coriolis_beta: Coriolis linear factor -> f = f + beta*y y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell . wind_stress: Wind stress parameters boundary_conditions: Boundary condition object write_netcdf: Write the results after each superstep to a netCDF file comm: MPI communicator """ #### THIS ALLOWS MAKES IT POSSIBLE TO GIVE THE OLD INPUT SHAPES TO NEW GHOST CELL REGIME: Only valid for benchmarking! if (eta0.shape == (ny, nx)): new_eta = np.zeros((ny+2, nx+2), dtype=np.float32) new_eta[:ny, :nx] = eta0.copy() eta0 = new_eta.copy() if (H.shape == (ny, nx)): new_H = np.ones((ny+2, nx+2), dtype=np.float32)*np.max(H) new_H[:ny,:nx] = H.copy() H = new_H.copy() if (hu0.shape == (ny, nx+1)): new_hu = np.zeros((ny+2, nx+1), dtype=np.float32) new_hu[:ny, :nx+1] = hu0.copy() hu0 = new_hu.copy() if (hv0.shape == (ny+1, nx)): new_hv = np.zeros((ny+3, nx+2), dtype=np.float32) new_hv[:ny+1,:nx] = hv0.copy() hv0 = new_hv.copy() #Create data by uploading to device ghost_cells_x = 1 ghost_cells_y = 1 y_zero_reference_cell = y_zero_reference_cell # Index range for interior domain (north, east, south, west) # so that interior domain of eta is # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \ # self.interior_domain_indices[3]:self.interior_domain_indices[1] ] self.interior_domain_indices = np.array([-1, -1, 1, 1]) self.boundary_conditions = boundary_conditions if boundary_conditions.isSponge(): nx = nx - 2 + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3] ny = ny - 2 + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2] y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[2] rk_order = None theta = None A = None super(FBL, self).__init__(gpu_ctx, \ nx, ny, \ ghost_cells_x, \ ghost_cells_y, \ dx, dy, dt, \ g, f, r, A, \ t, \ theta, rk_order, \ coriolis_beta, \ y_zero_reference_cell, \ wind_stress, \ write_netcdf, \ ignore_ghostcells, \ offset_x, offset_y, \ comm, \ block_width, block_height) self._set_interior_domain_from_sponge_cells() #Get kernels self.step_kernel = gpu_ctx.get_kernel("FBL_step_kernel.cu", defines={'block_width': block_width, 'block_height': block_height}, compile_args={ 'no_extern_c': True, 'options': ["--use_fast_math"], #'options': ["--generate-line-info"], #'options': ["--maxrregcount=32"] #'arch': "compute_50", #'code': "sm_50" }, jit_compile_args={ #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)] } ) # Get CUDA functions self.fblStepKernel = self.step_kernel.get_function("fblStepKernel") # Prepare kernel lauches self.fblStepKernel.prepare("iiffffffffPiPiPiPiif") # Set up textures self.update_wind_stress(self.step_kernel, self.fblStepKernel) self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, H) self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0, fbl=True) # Domain including ghost cells self.nx_halo = np.int32(nx + 2) self.ny_halo = np.int32(ny + 2) self.bc_kernel = FBL_boundary_conditions(self.gpu_ctx, \ self.nx, \ self.ny, \ self.boundary_conditions ) # Bit-wise boolean for wall boundary conditions self.wall_bc = np.int32(0) if (self.boundary_conditions.north == 1): self.wall_bc = self.wall_bc | 0x01 if (self.boundary_conditions.east == 1): self.wall_bc = self.wall_bc | 0x02 if (self.boundary_conditions.south == 1): self.wall_bc = self.wall_bc | 0x04 if (self.boundary_conditions.west == 1): self.wall_bc = self.wall_bc | 0x08 if self.write_netcdf: self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \ staggered_grid=True, \ offset_x=self.offset_x, offset_y=self.offset_y)