def fromfilename(cls, gpu_ctx, filename, cont_write_netcdf=True): """ Initialize and hotstart simulation from nc-file. cont_write_netcdf: Continue to write the results after each superstep to a new netCDF file filename: Continue simulation based on parameters and last timestep in this file """ # open nc-file sim_reader = SimReader.SimNetCDFReader(filename, ignore_ghostcells=False) sim_name = str(sim_reader.get('simulator_short')) assert sim_name == cls.__name__, \ "Trying to initialize a " + \ cls.__name__ + " simulator with netCDF file based on " \ + sim_name + " results." # read parameters nx = sim_reader.get("nx") ny = sim_reader.get("ny") dx = sim_reader.get("dx") dy = sim_reader.get("dy") width = nx * dx height = ny * dy dt = sim_reader.get("dt") g = sim_reader.get("g") r = sim_reader.get("bottom_friction_r") f = sim_reader.get("coriolis_force") beta = sim_reader.get("coriolis_beta") minmodTheta = sim_reader.get("minmod_theta") timeIntegrator = sim_reader.get("time_integrator") y_zero_reference_cell = sim_reader.get("y_zero_reference_cell") try: wind_stress_type = sim_reader.get("wind_stress_type") wind = Common.WindStressParams(type=wind_stress_type) except: wind = WindStress.WindStress() boundaryConditions = Common.BoundaryConditions( \ sim_reader.getBC()[0], sim_reader.getBC()[1], \ sim_reader.getBC()[2], sim_reader.getBC()[3], \ sim_reader.getBCSpongeCells()) h0 = sim_reader.getH() # get last timestep (including simulation time of last timestep) eta0, hu0, hv0, time0 = sim_reader.getLastTimeStep() return cls(gpu_ctx, \ h0, eta0, hu0, hv0, \ nx, ny, \ dx, dy, dt, \ g, f, r, \ t=time0, \ wind_stress=wind, \ boundary_conditions=boundaryConditions, \ write_netcdf=cont_write_netcdf)
def WindStressParams(type=99, # "no wind" \ tau0=0, rho=0, alpha=0, xm=0, Rc=0, \ x0=0, y0=0, \ u0=0, v0=0, \ wind_speed=0, wind_direction=0): """ Backward compatibility function to avoid rewriting old code and notebooks. SHOULD NOT BE USED IN NEW CODE! Make WindStress object directly instead. """ type_ = np.int32(type) tau0_ = np.float32(tau0) rho_ = np.float32(rho) rho_air_ = np.float32(1.3) # new parameter alpha_ = np.float32(alpha) xm_ = np.float32(xm) Rc_ = np.float32(Rc) x0_ = np.float32(x0) y0_ = np.float32(y0) u0_ = np.float32(u0) v0_ = np.float32(v0) wind_speed_ = np.float32(wind_speed) wind_direction_ = np.float32(wind_direction) if type == 0: wind_stress = WindStress.UniformAlongShoreWindStress( \ tau0=tau0_, rho=rho_, alpha=alpha_) elif type == 1: wind_stress = WindStress.BellShapedAlongShoreWindStress( \ xm=xm_, tau0=tau0_, rho=rho_, alpha=alpha_) elif type == 2: wind_stress = WindStress.MovingCycloneWindStress( \ Rc=Rc_, x0=x0_, y0=y0_, u0=u0_, v0=v0_) elif type == 50: wind_stress = WindStress.GenericUniformWindStress( \ rho_air=rho_air_, wind_speed=wind_speed_, wind_direction=wind_direction_) elif type == 99: wind_stress = WindStress.NoWindStress() else: raise RuntimeError('Invalid wind stress type!') return wind_stress
def setParameters(self, f=0, g=9.81, beta=0, r=0, wind=WindStress.WindStress()): self.g = g self.f = f self.beta = beta self.r = r self.wind = wind
def getWind(source_url_list, timestep_indices, timesteps, x0, x1, y0, y1): """ timestep_indices => index into netcdf-array, e.g. [1, 3, 5] timestep => time at timestep, e.g. [1800, 3600, 7200] """ if type(source_url_list) is not list: source_url_list = [source_url_list] num_files = len(source_url_list) source_url = source_url_list[0] assert (num_files == len(timesteps)), str(num_files) + ' vs ' + str( len(timesteps)) if (timestep_indices is None): timestep_indices = [None] * num_files for i in range(num_files): timestep_indices[i] = range(len(timesteps[i])) u_wind_list = [None] * num_files v_wind_list = [None] * num_files for i in range(num_files): try: ncfile = Dataset(source_url_list[i]) u_wind_list[i] = ncfile.variables['Uwind'][timestep_indices[i], y0:y1, x0:x1] v_wind_list[i] = ncfile.variables['Vwind'][timestep_indices[i], y0:y1, x0:x1] except Exception as e: raise e finally: ncfile.close() u_wind = u_wind_list[0].filled(0) v_wind = v_wind_list[0].filled(0) for i in range(1, num_files): u_wind = np.concatenate((u_wind, u_wind_list[i].filled(0))) v_wind = np.concatenate((v_wind, v_wind_list[i].filled(0))) u_wind = u_wind.astype(np.float32) v_wind = v_wind.astype(np.float32) wind_source = WindStress.WindStress(t=np.ravel(timesteps).copy(), X=u_wind, Y=v_wind) return wind_source
def init(self, driftersPerOceanModel=1): self.windSpeed = 2.0 self.directions = np.random.rand(self.numParticles + 1) * 360 self.windX, self.windY = self.XandYfromDirections(self.directions) #print "Directions: ", self.directions self.driftersPerOceanModel = driftersPerOceanModel self.windT = np.zeros((1), dtype=np.float32) for i in range(self.numParticles + 1): wX = [self.windX[i] * np.ones((2, 2), dtype=np.float32)] wY = [self.windY[i] * np.ones((2, 2), dtype=np.float32)] wind = WindStress.WindStress(self.windT, wX, wY) #print ("Init with wind :", (wX, wY)) self.particles[i] = CDKLM16.CDKLM16(self.gpu_ctx, \ self.base_eta, self.base_hu, self.base_hv, \ self.base_H, \ self.nx, self.ny, self.dx, self.dy, self.dt, \ self.g, self.f, self.r, \ wind_stress=wind, \ boundary_conditions=self.boundaryConditions, \ write_netcdf=False) if i == self.numParticles: # All particles done, only the observation is left, # and for the observation we only use one drifter, regardless of the # number in the other particles. driftersPerOceanModel = 1 drifters = GPUDrifterCollection.GPUDrifterCollection( self.gpu_ctx, driftersPerOceanModel, observation_variance=self.observation_variance, boundaryConditions=self.boundaryConditions, domain_size_x=self.nx * self.dx, domain_size_y=self.ny * self.dy) initPos = np.random.multivariate_normal( self.midPoint, self.initialization_cov_drifters, driftersPerOceanModel) drifters.setDrifterPositions(initPos) #print "drifter particles: ", drifter.getParticlePositions() #print "drifter observations: ", drifter.getObservationPosition() self.particles[i].attachDrifters(drifters) # Put the initial positions into the observation array self._addObservation(self.observeTrueDrifters()) print("Added init to observation array")
def resample(self, newSampleIndices, reinitialization_variance): obsTrueDrifter = self.observeTrueDrifters() positions = self.observeDrifters() windDirection = self.directions newWindDirection = np.empty_like(windDirection) newPos = np.empty((self.driftersPerOceanModel, 2)) newOceanStates = [None] * self.getNumParticles() for i in range(self.getNumParticles()): index = newSampleIndices[i] #print "(particle no, position, old direction, new direction): " newWindDirection[i] = np.random.normal(windDirection[index], reinitialization_variance, 1) if newWindDirection[i] > 360: newWindDirection[i] -= 360 elif newWindDirection[i] < 0: newWindDirection[i] += 360 newPos[:, :] = positions[index, :] #print "\t", (index, positions[index,:], windDirection[index]) #print "\t", (index, newPos, newWindDirection[i]) wX, wY = self.XandYfromDirections(newWindDirection[i]) wX = [wX * np.ones((2, 2), dtype=np.float32)] wY = [wY * np.ones((2, 2), dtype=np.float32)] newWindInstance = WindStress.WindStress(self.windT, wX, wY) # Download index's ocean state: eta0, hu0, hv0 = self.particles[index].download() eta1, hu1, hv1 = self.particles[index].downloadPrevTimestep() newOceanStates[i] = (eta0, hu0, hv0, eta1, hu1, hv1) self.particles[i].wind_stress = newWindInstance self.particles[i].drifters.setDrifterPositions(newPos) self.directions = newWindDirection.copy() # New loop for transferring the correct ocean states back up to the GPU: for i in range(self.getNumParticles()): self.particles[i].upload(newOceanStates[i][0], newOceanStates[i][1], newOceanStates[i][2], newOceanStates[i][3], newOceanStates[i][4], newOceanStates[i][5])
def fromfilename(cls, gpu_ctx, filename, cont_write_netcdf=True, use_lcg=False, new_netcdf_filename=None): """ Initialize and hotstart simulation from nc-file. cont_write_netcdf: Continue to write the results after each superstep to a new netCDF file filename: Continue simulation based on parameters and last timestep in this file new_netcdf_filename: If we want to continue to write netcdf, we should use this filename. Automatically generated if None. """ # open nc-file sim_reader = SimReader.SimNetCDFReader(filename, ignore_ghostcells=False) sim_name = str(sim_reader.get('simulator_short')) assert sim_name == cls.__name__, \ "Trying to initialize a " + \ cls.__name__ + " simulator with netCDF file based on " \ + sim_name + " results." # read the most recent state H = sim_reader.getH(); # get last timestep (including simulation time of last timestep) eta0, hu0, hv0, time0 = sim_reader.getLastTimeStep() # For some reason, some old netcdf had 3-dimensional bathymetry. # This fix ensures that we only use a valid H if len(H.shape) == 3: print("norm diff H: ", np.linalg.norm(H[0,:,:] - H[1,:,:])) H = H[0,:,:] # Set simulation parameters sim_params = { 'gpu_ctx': gpu_ctx, 'eta0': eta0, 'hu0': hu0, 'hv0': hv0, 'H': H, 'nx': sim_reader.get("nx"), 'ny': sim_reader.get("ny"), 'dx': sim_reader.get("dx"), 'dy': sim_reader.get("dy"), 'dt': sim_reader.get("dt"), 'g': sim_reader.get("g"), 'f': sim_reader.get("coriolis_force"), 'r': sim_reader.get("bottom_friction_r"), 't': time0, 'theta': sim_reader.get("minmod_theta"), 'rk_order': sim_reader.get("time_integrator"), 'coriolis_beta': sim_reader.get("coriolis_beta"), 'y_zero_reference_cell': sim_reader.get("y_zero_reference_cell"), 'write_netcdf': cont_write_netcdf, 'use_lcg': use_lcg, 'netcdf_filename': new_netcdf_filename } # Wind stress try: wind_stress_type = sim_reader.get("wind_stress_type") wind = Common.WindStressParams(type=wind_stress_type) except: wind = WindStress.WindStress() sim_params['wind_stress'] = wind # Boundary conditions sim_params['boundary_conditions'] = Common.BoundaryConditions( \ sim_reader.getBC()[0], sim_reader.getBC()[1], \ sim_reader.getBC()[2], sim_reader.getBC()[3], \ sim_reader.getBCSpongeCells()) # Model errors if sim_reader.has('small_scale_perturbation'): sim_params['small_scale_perturbation'] = sim_reader.get('small_scale_perturbation') == 'True' if sim_params['small_scale_perturbation']: sim_params['small_scale_perturbation_amplitude'] = sim_reader.get('small_scale_perturbation_amplitude') sim_params['small_scale_perturbation_interpolation_factor'] = sim_reader.get('small_scale_perturbation_interpolation_factor') # Data assimilation parameters: if sim_reader.has('model_time_step'): sim_params['model_time_step'] = sim_reader.get('model_time_step') return cls(**sim_params)
def __init__(self, \ gpu_ctx, \ eta0, H, hu0, hv0, \ nx, ny, \ dx, dy, dt, \ g, f=0.0, r=0.0, \ t=0.0, \ theta=1.3, use_rk2=True, coriolis_beta=0.0, \ y_zero_reference_cell = 0, \ wind_stress=WindStress.WindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ write_netcdf=False, \ comm=None, \ ignore_ghostcells=False, \ offset_x=0, offset_y=0, \ flux_slope_eps = 1.0e-1, \ depth_cutoff = 1.0e-5, \ block_width=32, block_height=16): """ Initialization routine eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells H: Depth from equilibrium defined on cell corners, (nx+5)*(ny+5) corners nx: Number of cells along x-axis ny: Number of cells along y-axis dx: Grid cell spacing along x-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m) dt: Size of each timestep (90 s) g: Gravitational accelleration (9.81 m/s^2) f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y r: Bottom friction coefficient (2.4e-3 m/s) t: Start simulation at time t theta: MINMOD theta used the reconstructions of the derivatives in the numerical scheme use_rk2: Boolean if to use 2nd order Runge-Kutta (false -> 1st order forward Euler) coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0) y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell . wind_stress: Wind stress parameters boundary_conditions: Boundary condition object write_netcdf: Write the results after each superstep to a netCDF file comm: MPI communicator depth_cutoff: Used for defining dry cells flux_slope_eps: Used for desingularization with dry cells """ ghost_cells_x = 2 ghost_cells_y = 2 y_zero_reference_cell = 2.0 + y_zero_reference_cell # Boundary conditions self.boundary_conditions = boundary_conditions # Extend the computational domain if the boundary conditions # require it if (boundary_conditions.isSponge()): nx = nx + boundary_conditions.spongeCells[ 1] + boundary_conditions.spongeCells[3] - 2 * ghost_cells_x ny = ny + boundary_conditions.spongeCells[ 0] + boundary_conditions.spongeCells[2] - 2 * ghost_cells_y y_zero_reference_cell = boundary_conditions.spongeCells[ 2] + y_zero_reference_cell self.use_rk2 = use_rk2 rk_order = np.int32(use_rk2 + 1) A = None super(KP07, self).__init__(gpu_ctx, \ nx, ny, \ ghost_cells_x, \ ghost_cells_y, \ dx, dy, dt, \ g, f, r, A, \ t, \ theta, rk_order, \ coriolis_beta, \ y_zero_reference_cell, \ wind_stress, \ write_netcdf, \ ignore_ghostcells, \ offset_x, offset_y, \ comm, \ block_width, block_height) # Index range for interior domain (north, east, south, west) # so that interior domain of eta is # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \ # self.interior_domain_indices[3]:self.interior_domain_indices[1] ] self.interior_domain_indices = np.array([-2, -2, 2, 2]) self._set_interior_domain_from_sponge_cells() # The ocean simulators and the swashes cases are defined on # completely different scales. We therefore specify a different # desingularization parameter if we run a swashes case. # Typical values: #ifndef SWASHES #define KPSIMULATOR_FLUX_SLOPE_EPS 1e-1f #define KPSIMULATOR_FLUX_SLOPE_EPS_4 1.0e-4f #else #define KPSIMULATOR_FLUX_SLOPE_EPS 1.0e-4f #define KPSIMULATOR_FLUX_SLOPE_EPS_4 1.0e-16f #endif defines = { 'block_width': block_width, 'block_height': block_height, 'KPSIMULATOR_FLUX_SLOPE_EPS': str(flux_slope_eps) + 'f', 'KPSIMULATOR_FLUX_SLOPE_EPS_4': str(flux_slope_eps**4) + 'f', 'KPSIMULATOR_DEPTH_CUTOFF': str(depth_cutoff) + 'f' } #Get kernels self.kp07_kernel = gpu_ctx.get_kernel( "KP07_kernel.cu", defines=defines, compile_args={ # default, fast_math, optimal 'options': [ "--ftz=true", # false, true, true "--prec-div=false", # true, false, false, "--prec-sqrt=false", # true, false, false "--fmad=false" ] # true, true, false #'options': ["--use_fast_math"] #'options': ["--generate-line-info"], #nvcc_options=["--maxrregcount=39"], #'arch': "compute_50", #'code': "sm_50" }, jit_compile_args={ #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)] }) # Get CUDA functions and define data types for prepared_{async_}call() self.swe_2D = self.kp07_kernel.get_function("swe_2D") self.swe_2D.prepare("iifffffffffiPiPiPiPiPiPiPiPiiiiif") self.update_wind_stress(self.kp07_kernel, self.swe_2D) # Upload Bathymetry self.bathymetry = Common.Bathymetry(self.gpu_ctx, self.gpu_stream, \ nx, ny, ghost_cells_x, ghost_cells_y, H, boundary_conditions) # Adjust eta for possible dry states Hm = self.downloadBathymetry()[1] eta0 = np.maximum(eta0, -Hm) #Create data by uploading to device self.gpu_data = Common.SWEDataArakawaA(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0) self.bc_kernel = Common.BoundaryConditionsArakawaA(gpu_ctx, \ self.nx, \ self.ny, \ ghost_cells_x, \ ghost_cells_y, \ self.boundary_conditions) if self.write_netcdf: self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \ offset_x=self.offset_x, offset_y=self.offset_y)
def __init__(self, \ gpu_ctx, \ eta0, hu0, hv0, H, \ nx, ny, \ dx, dy, dt, \ g, f, r, \ subsample_f=10, \ angle=np.array([[0]], dtype=np.float32), \ subsample_angle=10, \ latitude=None, \ t=0.0, \ theta=1.3, rk_order=2, \ coriolis_beta=0.0, \ max_wind_direction_perturbation = 0, \ wind_stress=WindStress.WindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ boundary_conditions_data=Common.BoundaryConditionsData(), \ small_scale_perturbation=False, \ small_scale_perturbation_amplitude=None, \ small_scale_perturbation_interpolation_factor = 1, \ model_time_step=None, reportGeostrophicEquilibrium=False, \ use_lcg=False, \ write_netcdf=False, \ comm=None, \ local_particle_id=0, \ super_dir_name=None, \ netcdf_filename=None, \ ignore_ghostcells=False, \ courant_number=0.8, \ offset_x=0, offset_y=0, \ flux_slope_eps = 1.0e-1, \ desingularization_eps = 1.0e-1, \ depth_cutoff = 1.0e-5, \ block_width=12, block_height=32, num_threads_dt=256, block_width_model_error=16, block_height_model_error=16): """ Initialization routine eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells H: Depth from equilibrium defined on cell corners, (nx+5)*(ny+5) corners nx: Number of cells along x-axis ny: Number of cells along y-axis dx: Grid cell spacing along x-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m) dt: Size of each timestep (90 s) g: Gravitational accelleration (9.81 m/s^2) f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y r: Bottom friction coefficient (2.4e-3 m/s) subsample_f: Subsample the coriolis f when creating texture by factor angle: Angle of rotation from North to y-axis as a texture (cuda.Array) or numpy array (in radians) subsample_angle: Subsample the angles given as input when creating texture by factor latitude: Specify latitude. This will override any f and beta plane already set (in radians) t: Start simulation at time t theta: MINMOD theta used the reconstructions of the derivatives in the numerical scheme rk_order: Order of Runge Kutta method {1,2*,3} coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0) max_wind_direction_perturbation: Large-scale model error emulation by per-time-step perturbation of wind direction by +/- max_wind_direction_perturbation (degrees) wind_stress: Wind stress parameters boundary_conditions: Boundary condition object small_scale_perturbation: Boolean value for applying a stochastic model error small_scale_perturbation_amplitude: Amplitude (q0 coefficient) for model error small_scale_perturbation_interpolation_factor: Width factor for correlation in model error model_time_step: The size of a data assimilation model step (default same as dt) reportGeostrophicEquilibrium: Calculate the Geostrophic Equilibrium variables for each superstep use_lcg: Use LCG as the random number generator. Default is False, which means using curand. write_netcdf: Write the results after each superstep to a netCDF file comm: MPI communicator local_particle_id: Local (for each MPI process) particle id desingularization_eps: Used for desingularizing hu/h flux_slope_eps: Used for setting zero flux for symmetric Riemann fan depth_cutoff: Used for defining dry cells super_dir_name: Directory to write netcdf files to netcdf_filename: Use this filename. (If not defined, a filename will be generated by SimWriter.) """ self.logger = logging.getLogger(__name__) assert (rk_order < 4 or rk_order > 0 ), "Only 1st, 2nd and 3rd order Runge Kutta supported" if (rk_order == 3): assert (r == 0.0 ), "3rd order Runge Kutta supported only without friction" # Sort out internally represented ghost_cells in the presence of given # boundary conditions ghost_cells_x = 2 ghost_cells_y = 2 #Coriolis at "first" cell x_zero_reference_cell = ghost_cells_x y_zero_reference_cell = ghost_cells_y # In order to pass it to the super constructor # Boundary conditions self.boundary_conditions = boundary_conditions #Compensate f for reference cell (first cell in internal of domain) north = np.array([np.sin(angle[0, 0]), np.cos(angle[0, 0])]) f = f - coriolis_beta * (x_zero_reference_cell * dx * north[0] + y_zero_reference_cell * dy * north[1]) x_zero_reference_cell = 0 y_zero_reference_cell = 0 A = None self.max_wind_direction_perturbation = max_wind_direction_perturbation super(CDKLM16, self).__init__(gpu_ctx, \ nx, ny, \ ghost_cells_x, \ ghost_cells_y, \ dx, dy, dt, \ g, f, r, A, \ t, \ theta, rk_order, \ coriolis_beta, \ y_zero_reference_cell, \ wind_stress, \ write_netcdf, \ ignore_ghostcells, \ offset_x, offset_y, \ comm, \ block_width, block_height, local_particle_id=local_particle_id) # Index range for interior domain (north, east, south, west) # so that interior domain of eta is # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \ # self.interior_domain_indices[3]:self.interior_domain_indices[1] ] self.interior_domain_indices = np.array([-2, -2, 2, 2]) defines = { 'block_width': block_width, 'block_height': block_height, 'KPSIMULATOR_DESING_EPS': "{:.12f}f".format(desingularization_eps), 'KPSIMULATOR_FLUX_SLOPE_EPS': "{:.12f}f".format(flux_slope_eps), 'KPSIMULATOR_DEPTH_CUTOFF': "{:.12f}f".format(depth_cutoff), 'THETA': "{:.12f}f".format(self.theta), 'RK_ORDER': int(self.rk_order), 'NX': int(self.nx), 'NY': int(self.ny), 'DX': "{:.12f}f".format(self.dx), 'DY': "{:.12f}f".format(self.dy), 'GRAV': "{:.12f}f".format(self.g), 'FRIC': "{:.12f}f".format(self.r) } #Get kernels self.kernel = gpu_ctx.get_kernel( "CDKLM16_kernel.cu", defines=defines, compile_args={ # default, fast_math, optimal 'options': [ "--ftz=true", # false, true, true "--prec-div=false", # true, false, false, "--prec-sqrt=false", # true, false, false "--fmad=false" ] # true, true, false #'options': ["--use_fast_math"] #'options': ["--generate-line-info"], #nvcc_options=["--maxrregcount=39"], #'arch': "compute_50", #'code': "sm_50" }, jit_compile_args={ #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)] }) # Get CUDA functions and define data types for prepared_{async_}call() self.cdklm_swe_2D = self.kernel.get_function("cdklm_swe_2D") self.cdklm_swe_2D.prepare("fiPiPiPiPiPiPiPiPiffi") self.update_wind_stress(self.kernel, self.cdklm_swe_2D) # CUDA functions for finding max time step size: self.num_threads_dt = num_threads_dt self.num_blocks_dt = np.int32(self.global_size[0] * self.global_size[1]) self.update_dt_kernels = gpu_ctx.get_kernel("max_dt.cu", defines={ 'block_width': block_width, 'block_height': block_height, 'NUM_THREADS': self.num_threads_dt }) self.per_block_max_dt_kernel = self.update_dt_kernels.get_function( "per_block_max_dt") self.per_block_max_dt_kernel.prepare("iifffPiPiPiPifPi") self.max_dt_reduction_kernel = self.update_dt_kernels.get_function( "max_dt_reduction") self.max_dt_reduction_kernel.prepare("iPP") # Bathymetry self.bathymetry = Common.Bathymetry(gpu_ctx, self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, H, boundary_conditions) # Adjust eta for possible dry states Hm = self.downloadBathymetry()[1] eta0 = np.maximum(eta0, -Hm) # Create data by uploading to device self.gpu_data = Common.SWEDataArakawaA(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0) # Allocate memory for calculating maximum timestep host_dt = np.zeros((self.global_size[1], self.global_size[0]), dtype=np.float32) self.device_dt = Common.CUDAArray2D(self.gpu_stream, self.global_size[0], self.global_size[1], 0, 0, host_dt) host_max_dt_buffer = np.zeros((1, 1), dtype=np.float32) self.max_dt_buffer = Common.CUDAArray2D(self.gpu_stream, 1, 1, 0, 0, host_max_dt_buffer) self.courant_number = courant_number ## Allocating memory for geostrophical equilibrium variables self.reportGeostrophicEquilibrium = np.int32( reportGeostrophicEquilibrium) self.geoEq_uxpvy = None self.geoEq_Kx = None self.geoEq_Ly = None if self.reportGeostrophicEquilibrium: dummy_zero_array = np.zeros( (ny + 2 * ghost_cells_y, nx + 2 * ghost_cells_x), dtype=np.float32, order='C') self.geoEq_uxpvy = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array) self.geoEq_Kx = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array) self.geoEq_Ly = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array) self.constant_equilibrium_depth = np.max(H) self.bc_kernel = Common.BoundaryConditionsArakawaA(gpu_ctx, \ self.nx, \ self.ny, \ ghost_cells_x, \ ghost_cells_y, \ self.boundary_conditions, \ boundary_conditions_data, \ ) def subsample_texture(data, factor): ny, nx = data.shape dx, dy = 1 / nx, 1 / ny I = interp2d(np.linspace(0.5 * dx, 1 - 0.5 * dx, nx), np.linspace(0.5 * dy, 1 - 0.5 * dy, ny), data, kind='linear') new_nx, new_ny = max(2, nx // factor), max(2, ny // factor) new_dx, new_dy = 1 / new_nx, 1 / new_ny x_new = np.linspace(0.5 * new_dx, 1 - 0.5 * new_dx, new_nx) y_new = np.linspace(0.5 * new_dy, 1 - 0.5 * new_dy, new_ny) return I(x_new, y_new) # Texture for angle self.angle_texref = self.kernel.get_texref("angle_tex") if isinstance(angle, cuda.Array): # angle is already a texture, so we just set the texture reference self.angle_texref.set_array(angle) else: #Upload data to GPU and bind to texture reference if (subsample_angle and angle.size >= eta0.size): self.logger.info("Subsampling angle texture by factor " + str(subsample_angle)) self.logger.warning( "This will give inaccurate angle along the border!") angle = subsample_texture(angle, subsample_angle) self.angle_texref.set_array( cuda.np_to_array(np.ascontiguousarray(angle, dtype=np.float32), order="C")) # Set texture parameters self.angle_texref.set_filter_mode( cuda.filter_mode.LINEAR) #bilinear interpolation self.angle_texref.set_address_mode( 0, cuda.address_mode.CLAMP) #no indexing outside domain self.angle_texref.set_address_mode(1, cuda.address_mode.CLAMP) self.angle_texref.set_flags( cuda.TRSF_NORMALIZED_COORDINATES) #Use [0, 1] indexing # Texture for coriolis f self.coriolis_texref = self.kernel.get_texref("coriolis_f_tex") # Create the CPU coriolis if (latitude is not None): if (self.f != 0.0): raise RuntimeError( "Cannot specify both latitude and f. Make your mind up.") coriolis_f, _ = OceanographicUtilities.calcCoriolisParams(latitude) coriolis_f = coriolis_f.astype(np.float32) else: if (self.coriolis_beta != 0.0): if (angle.size != 1): raise RuntimeError( "non-constant angle cannot be combined with beta plane model (makes no sense)" ) #Generate coordinates for all cells, including ghost cells from center to center # [-3/2dx, nx+3/2dx] for ghost_cells_x == 2 x = np.linspace((-self.ghost_cells_x + 0.5) * self.dx, (self.nx + self.ghost_cells_x - 0.5) * self.dx, self.nx + 2 * self.ghost_cells_x) y = np.linspace((-self.ghost_cells_y + 0.5) * self.dy, (self.ny + self.ghost_cells_y - 0.5) * self.dy, self.ny + 2 * self.ghost_cells_x) self.logger.info( "Using latitude to create Coriolis f texture ({:f}x{:f} cells)" .format(x.size, y.size)) x, y = np.meshgrid(x, y) n = x * np.sin(angle[0, 0]) + y * np.cos( angle[0, 0]) #North vector coriolis_f = self.f + self.coriolis_beta * n else: if (self.f.size == 1): coriolis_f = np.array([[self.f]], dtype=np.float32) elif (self.f.shape == eta0.shape): coriolis_f = np.array(self.f, dtype=np.float32) else: raise RuntimeError( "The shape of f should match up with eta or be scalar." ) if (subsample_f and coriolis_f.size >= eta0.size): self.logger.info("Subsampling coriolis texture by factor " + str(subsample_f)) self.logger.warning( "This will give inaccurate coriolis along the border!") coriolis_f = subsample_texture(coriolis_f, subsample_f) #Upload data to GPU and bind to texture reference self.coriolis_texref.set_array( cuda.np_to_array(np.ascontiguousarray(coriolis_f, dtype=np.float32), order="C")) # Set texture parameters self.coriolis_texref.set_filter_mode( cuda.filter_mode.LINEAR) #bilinear interpolation self.coriolis_texref.set_address_mode( 0, cuda.address_mode.CLAMP) #no indexing outside domain self.coriolis_texref.set_address_mode(1, cuda.address_mode.CLAMP) self.coriolis_texref.set_flags( cuda.TRSF_NORMALIZED_COORDINATES) #Use [0, 1] indexing # Small scale perturbation: self.small_scale_perturbation = small_scale_perturbation self.small_scale_model_error = None self.small_scale_perturbation_interpolation_factor = small_scale_perturbation_interpolation_factor if small_scale_perturbation: self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim( self, soar_q0=small_scale_perturbation_amplitude, interpolation_factor= small_scale_perturbation_interpolation_factor, use_lcg=use_lcg, block_width=block_width_model_error, block_height=block_height_model_error) # Data assimilation model step size self.model_time_step = model_time_step self.total_time_steps = 0 if model_time_step is None: self.model_time_step = self.dt if self.write_netcdf: self.sim_writer = SimWriter.SimNetCDFWriter(self, super_dir_name=super_dir_name, filename=netcdf_filename, \ ignore_ghostcells=self.ignore_ghostcells, offset_x=self.offset_x, offset_y=self.offset_y) # Update timestep if dt is given as zero if self.dt <= 0: self.updateDt()
def __init__(self, \ gpu_ctx, \ H, eta0, hu0, hv0, \ nx, ny, \ dx, dy, dt, \ g, f, r, \ t=0.0, \ coriolis_beta=0.0, \ y_zero_reference_cell = 1, \ wind_stress=WindStress.WindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ write_netcdf=False, \ comm=None, \ ignore_ghostcells=False, \ offset_x=0, offset_y=0, \ block_width=16, block_height=16): """ Initialization routine H: Water depth incl ghost cells, (nx+2)*(ny+2) cells eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+3) cells nx: Number of cells along x-axis ny: Number of cells along y-axis dx: Grid cell spacing along x-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m) dt: Size of each timestep (90 s) g: Gravitational accelleration (9.81 m/s^2) f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y r: Bottom friction coefficient (2.4e-3 m/s) coriolis_beta: Coriolis linear factor -> f = f + beta*y y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell . wind_stress: Wind stress parameters boundary_conditions: Boundary condition object write_netcdf: Write the results after each superstep to a netCDF file comm: MPI communicator """ #### THIS ALLOWS MAKES IT POSSIBLE TO GIVE THE OLD INPUT SHAPES TO NEW GHOST CELL REGIME: Only valid for benchmarking! if (eta0.shape == (ny, nx)): new_eta = np.zeros((ny+2, nx+2), dtype=np.float32) new_eta[:ny, :nx] = eta0.copy() eta0 = new_eta.copy() if (H.shape == (ny, nx)): new_H = np.ones((ny+2, nx+2), dtype=np.float32)*np.max(H) new_H[:ny,:nx] = H.copy() H = new_H.copy() if (hu0.shape == (ny, nx+1)): new_hu = np.zeros((ny+2, nx+1), dtype=np.float32) new_hu[:ny, :nx+1] = hu0.copy() hu0 = new_hu.copy() if (hv0.shape == (ny+1, nx)): new_hv = np.zeros((ny+3, nx+2), dtype=np.float32) new_hv[:ny+1,:nx] = hv0.copy() hv0 = new_hv.copy() #Create data by uploading to device ghost_cells_x = 1 ghost_cells_y = 1 y_zero_reference_cell = y_zero_reference_cell # Index range for interior domain (north, east, south, west) # so that interior domain of eta is # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \ # self.interior_domain_indices[3]:self.interior_domain_indices[1] ] self.interior_domain_indices = np.array([-1, -1, 1, 1]) self.boundary_conditions = boundary_conditions if boundary_conditions.isSponge(): nx = nx - 2 + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3] ny = ny - 2 + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2] y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[2] rk_order = None theta = None A = None super(FBL, self).__init__(gpu_ctx, \ nx, ny, \ ghost_cells_x, \ ghost_cells_y, \ dx, dy, dt, \ g, f, r, A, \ t, \ theta, rk_order, \ coriolis_beta, \ y_zero_reference_cell, \ wind_stress, \ write_netcdf, \ ignore_ghostcells, \ offset_x, offset_y, \ comm, \ block_width, block_height) self._set_interior_domain_from_sponge_cells() #Get kernels self.step_kernel = gpu_ctx.get_kernel("FBL_step_kernel.cu", defines={'block_width': block_width, 'block_height': block_height}, compile_args={ 'no_extern_c': True, 'options': ["--use_fast_math"], #'options': ["--generate-line-info"], #'options': ["--maxrregcount=32"] #'arch': "compute_50", #'code': "sm_50" }, jit_compile_args={ #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)] } ) # Get CUDA functions self.fblStepKernel = self.step_kernel.get_function("fblStepKernel") # Prepare kernel lauches self.fblStepKernel.prepare("iiffffffffPiPiPiPiif") # Set up textures self.update_wind_stress(self.step_kernel, self.fblStepKernel) self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, H) self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0, fbl=True) # Domain including ghost cells self.nx_halo = np.int32(nx + 2) self.ny_halo = np.int32(ny + 2) self.bc_kernel = FBL_boundary_conditions(self.gpu_ctx, \ self.nx, \ self.ny, \ self.boundary_conditions ) # Bit-wise boolean for wall boundary conditions self.wall_bc = np.int32(0) if (self.boundary_conditions.north == 1): self.wall_bc = self.wall_bc | 0x01 if (self.boundary_conditions.east == 1): self.wall_bc = self.wall_bc | 0x02 if (self.boundary_conditions.south == 1): self.wall_bc = self.wall_bc | 0x04 if (self.boundary_conditions.west == 1): self.wall_bc = self.wall_bc | 0x08 if self.write_netcdf: self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \ staggered_grid=True, \ offset_x=self.offset_x, offset_y=self.offset_y)
def getWindSourceterm(source_url_list, timestep_indices, timesteps, x0, x1, y0, y1): """ timestep_indices => index into netcdf-array, e.g. [1, 3, 5] timestep => time at timestep, e.g. [1800, 3600, 7200] """ if type(source_url_list) is not list: source_url_list = [source_url_list] num_files = len(source_url_list) source_url = source_url_list[0] assert (num_files == len(timesteps)), str(num_files) + ' vs ' + str( len(timesteps)) if (timestep_indices is None): timestep_indices = [None] * num_files for i in range(num_files): timestep_indices[i] = range(len(timesteps[i])) u_wind_list = [None] * num_files v_wind_list = [None] * num_files for i in range(num_files): try: ncfile = Dataset(source_url_list[i]) u_wind_list[i] = ncfile.variables['Uwind'][timestep_indices[i], y0:y1, x0:x1] v_wind_list[i] = ncfile.variables['Vwind'][timestep_indices[i], y0:y1, x0:x1] except Exception as e: raise e finally: ncfile.close() u_wind = u_wind_list[0].filled(0) v_wind = v_wind_list[0].filled(0) for i in range(1, num_files): u_wind = np.concatenate((u_wind, u_wind_list[i].filled(0))) v_wind = np.concatenate((v_wind, v_wind_list[i].filled(0))) wind_speed = np.sqrt(np.power(u_wind, 2) + np.power(v_wind, 2)) # C_drag as defined by Engedahl (1995) #(See "Documentation of simple ocean models for use in ensemble predictions. Part II: Benchmark cases" #at https://www.met.no/publikasjoner/met-report/met-report-2012 for details.) / def computeDrag(wind_speed): C_drag = np.where(wind_speed < 11, 0.0012, 0.00049 + 0.000065 * wind_speed) return C_drag C_drag = computeDrag(wind_speed) rho_a = 1.225 # Density of air rho_w = 1025 # Density of water #Wind stress is then # tau_s = rho_a * C_drag * |W|W wind_stress = C_drag * wind_speed * rho_a / rho_w wind_stress_u = wind_stress * u_wind wind_stress_v = wind_stress * v_wind wind_source = WindStress.WindStress(t=np.ravel(timesteps).copy(), X=wind_stress_u, Y=wind_stress_v) return wind_source
def __init__(self, \ gpu_ctx, \ H, eta0, hu0, hv0, \ nx, ny, \ dx, dy, dt, \ g, f, r, A=0.0, \ t=0.0, \ coriolis_beta=0.0, \ y_zero_reference_cell = 0, \ wind_stress=WindStress.WindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ write_netcdf=False, \ ignore_ghostcells=False, \ offset_x=0, offset_y=0, \ block_width=16, block_height=16): """ Initialization routine H: Water depth incl ghost cells, (nx+2)*(ny+2) cells eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells nx: Number of cells along x-axis ny: Number of cells along y-axis dx: Grid cell spacing along x-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m) dt: Size of each timestep (90 s) g: Gravitational accelleration (9.81 m/s^2) f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y r: Bottom friction coefficient (2.4e-3 m/s) A: Eddy viscosity coefficient (O(dx)) t: Start simulation at time t coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0) y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell . wind_stress: Wind stress parameters boundary_conditions: Boundary condition object write_netcdf: Write the results after each superstep to a netCDF file """ # Sort out internally represented ghost_cells in the presence of given # boundary conditions halo_x = 1 halo_y = 1 ghost_cells_x = 1 ghost_cells_y = 1 y_zero_reference_cell = y_zero_reference_cell + 1 self.boundary_conditions = boundary_conditions if boundary_conditions.isSponge(): nx = nx + boundary_conditions.spongeCells[ 1] + boundary_conditions.spongeCells[3] - 2 * ghost_cells_x ny = ny + boundary_conditions.spongeCells[ 0] + boundary_conditions.spongeCells[2] - 2 * ghost_cells_y y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[ 2] # self.<parameters> are sat in parent constructor: rk_order = None theta = None super(CTCS, self).__init__(gpu_ctx, \ nx, ny, \ ghost_cells_x, \ ghost_cells_y, \ dx, dy, dt, \ g, f, r, A, \ t, \ theta, rk_order, \ coriolis_beta, \ y_zero_reference_cell, \ wind_stress, \ write_netcdf, \ ignore_ghostcells, \ offset_x, offset_y, \ block_width, block_height) # Index range for interior domain (north, east, south, west) # so that interior domain of eta is # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \ # self.interior_domain_indices[3]:self.interior_domain_indices[1] ] self.interior_domain_indices = np.array([-1, -1, 1, 1]) self._set_interior_domain_from_sponge_cells() #Get kernels self.u_kernel = gpu_ctx.get_kernel("CTCS_U_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) self.v_kernel = gpu_ctx.get_kernel("CTCS_V_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) self.eta_kernel = gpu_ctx.get_kernel("CTCS_eta_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) # Get CUDA functions self.computeUKernel = self.u_kernel.get_function("computeUKernel") self.computeVKernel = self.v_kernel.get_function("computeVKernel") self.computeEtaKernel = self.eta_kernel.get_function( "computeEtaKernel") # Prepare kernel lauches self.computeUKernel.prepare("iiiifffffffffPiPiPiPiPif") self.computeVKernel.prepare("iiiifffffffffPiPiPiPiPif") self.computeEtaKernel.prepare("iiffffffffPiPiPi") # Set up textures self.update_wind_stress(self.u_kernel, self.computeUKernel) self.update_wind_stress(self.v_kernel, self.computeVKernel) #Create data by uploading to device self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, halo_x, halo_y, H) self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, halo_x, halo_y, eta0, hu0, hv0) # Global size needs to be larger than the default from parent.__init__ self.global_size = ( \ int(np.ceil((self.nx+2*halo_x) / float(self.local_size[0]))), \ int(np.ceil((self.ny+2*halo_y) / float(self.local_size[1]))) \ ) self.bc_kernel = CTCS_boundary_condition(gpu_ctx, \ self.nx, \ self.ny, \ self.boundary_conditions, \ halo_x, halo_y \ ) if self.write_netcdf: self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \ staggered_grid=True, offset_x=self.offset_x, offset_y=self.offset_y)
def __init__(self, \ gpu_ctx, \ eta0, Hi, hu0, hv0, \ nx, ny, \ dx, dy, dt, \ g, f=0.0, r=0.0, \ t=0.0, \ theta=1.3, use_rk2=True, coriolis_beta=0.0, \ y_zero_reference_cell = 0, \ wind_stress=WindStress.WindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ write_netcdf=False, \ ignore_ghostcells=False, \ offset_x=0, offset_y=0, \ block_width=32, block_height=16): """ Initialization routine eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells Hi: Depth from equilibrium defined on cell corners, (nx+5)*(ny+5) corners nx: Number of cells along x-axis ny: Number of cells along y-axis dx: Grid cell spacing along x-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m) dt: Size of each timestep (90 s) g: Gravitational accelleration (9.81 m/s^2) f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y r: Bottom friction coefficient (2.4e-3 m/s) t: Start simulation at time t theta: MINMOD theta used the reconstructions of the derivatives in the numerical scheme use_rk2: Boolean if to use 2nd order Runge-Kutta (false -> 1st order forward Euler) coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0) y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell . wind_stress: Wind stress parameters boundary_conditions: Boundary condition object write_netcdf: Write the results after each superstep to a netCDF file """ ## After changing from (h, B) to (eta, H), several of the simulator settings used are wrong. This check will help detect that. if ( np.sum(eta0 - Hi[:-1, :-1] > 0) > nx): assert(False), "It seems you are using water depth/elevation h and bottom topography B, while you should use water level eta and equillibrium depth H." ghost_cells_x = 2 ghost_cells_y = 2 y_zero_reference_cell = 2.0 + y_zero_reference_cell # Boundary conditions self.boundary_conditions = boundary_conditions # Extend the computational domain if the boundary conditions # require it if (boundary_conditions.isSponge()): nx = nx + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3] - 2*ghost_cells_x ny = ny + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2] - 2*ghost_cells_y y_zero_reference_cell = boundary_conditions.spongeCells[2] + y_zero_reference_cell self.use_rk2 = use_rk2 rk_order = np.int32(use_rk2 + 1) A = None super(KP07, self).__init__(gpu_ctx, \ nx, ny, \ ghost_cells_x, \ ghost_cells_y, \ dx, dy, dt, \ g, f, r, A, \ t, \ theta, rk_order, \ coriolis_beta, \ y_zero_reference_cell, \ wind_stress, \ write_netcdf, \ ignore_ghostcells, \ offset_x, offset_y, \ block_width, block_height) # Index range for interior domain (north, east, south, west) # so that interior domain of eta is # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \ # self.interior_domain_indices[3]:self.interior_domain_indices[1] ] self.interior_domain_indices = np.array([-2,-2,2,2]) self._set_interior_domain_from_sponge_cells() #Get kernels self.kp07_kernel = gpu_ctx.get_kernel("KP07_kernel.cu", defines={'block_width': block_width, 'block_height': block_height}) # Get CUDA functions and define data types for prepared_{async_}call() self.swe_2D = self.kp07_kernel.get_function("swe_2D") self.swe_2D.prepare("iifffffffffiPiPiPiPiPiPiPiPiiiiif") self.update_wind_stress(self.kp07_kernel, self.swe_2D) #Create data by uploading to device self.gpu_data = Common.SWEDataArakawaA(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0) #Bathymetry self.bathymetry = Common.Bathymetry(self.gpu_ctx, self.gpu_stream, \ nx, ny, ghost_cells_x, ghost_cells_y, Hi, boundary_conditions) self.bc_kernel = Common.BoundaryConditionsArakawaA(gpu_ctx, \ self.nx, \ self.ny, \ ghost_cells_x, \ ghost_cells_y, \ self.boundary_conditions) if self.write_netcdf: self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \ offset_x=self.offset_x, offset_y=self.offset_y)
def __init__(self, \ gpu_ctx, \ eta0, hu0, hv0, Hi, \ nx, ny, \ dx, dy, dt, \ g, f, r, \ t=0.0, \ theta=1.3, rk_order=2, \ coriolis_beta=0.0, \ y_zero_reference_cell = 0, \ max_wind_direction_perturbation = 0, \ wind_stress=WindStress.WindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ small_scale_perturbation=False, \ small_scale_perturbation_amplitude=None, \ h0AsWaterElevation=False, \ reportGeostrophicEquilibrium=False, \ write_netcdf=False, \ ignore_ghostcells=False, \ offset_x=0, offset_y=0, \ block_width=32, block_height=4): """ Initialization routine eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells Hi: Depth from equilibrium defined on cell corners, (nx+5)*(ny+5) corners nx: Number of cells along x-axis ny: Number of cells along y-axis dx: Grid cell spacing along x-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m) dt: Size of each timestep (90 s) g: Gravitational accelleration (9.81 m/s^2) f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y r: Bottom friction coefficient (2.4e-3 m/s) t: Start simulation at time t theta: MINMOD theta used the reconstructions of the derivatives in the numerical scheme rk_order: Order of Runge Kutta method {1,2*,3} coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0) y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell . max_wind_direction_perturbation: Large-scale model error emulation by per-time-step perturbation of wind direction by +/- max_wind_direction_perturbation (degrees) wind_stress: Wind stress parameters boundary_conditions: Boundary condition object h0AsWaterElevation: True if h0 is described by the surface elevation, and false if h0 is described by water depth reportGeostrophicEquilibrium: Calculate the Geostrophic Equilibrium variables for each superstep write_netcdf: Write the results after each superstep to a netCDF file """ ## After changing from (h, B) to (eta, H), several of the simulator settings used are wrong. This check will help detect that. if ( np.sum(eta0 - Hi[:-1, :-1] > 0) > nx): assert(False), "It seems you are using water depth/elevation h and bottom topography B, while you should use water level eta and equillibrium depth H." assert( rk_order < 4 or rk_order > 0 ), "Only 1st, 2nd and 3rd order Runge Kutta supported" if (rk_order == 3): assert(r == 0.0), "3rd order Runge Kutta supported only without friction" # Sort out internally represented ghost_cells in the presence of given # boundary conditions ghost_cells_x = 2 ghost_cells_y = 2 y_zero_reference_cell = 2 + y_zero_reference_cell # Boundary conditions self.boundary_conditions = boundary_conditions if (boundary_conditions.isSponge()): nx = nx + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3] - 2*ghost_cells_x ny = ny + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2] - 2*ghost_cells_y y_zero_reference_cell = boundary_conditions.spongeCells[2] + y_zero_reference_cell A = None self.max_wind_direction_perturbation = max_wind_direction_perturbation super(CDKLM16, self).__init__(gpu_ctx, \ nx, ny, \ ghost_cells_x, \ ghost_cells_y, \ dx, dy, dt, \ g, f, r, A, \ t, \ theta, rk_order, \ coriolis_beta, \ y_zero_reference_cell, \ wind_stress, \ write_netcdf, \ ignore_ghostcells, \ offset_x, offset_y, \ block_width, block_height) # Index range for interior domain (north, east, south, west) # so that interior domain of eta is # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \ # self.interior_domain_indices[3]:self.interior_domain_indices[1] ] self.interior_domain_indices = np.array([-2,-2,2,2]) self._set_interior_domain_from_sponge_cells() #Get kernels self.kernel = gpu_ctx.get_kernel("CDKLM16_kernel.cu", defines={'block_width': block_width, 'block_height': block_height}) # Get CUDA functions and define data types for prepared_{async_}call() self.swe_2D = self.kernel.get_function("swe_2D") self.swe_2D.prepare("iifffffffffiiPiPiPiPiPiPiPiPifiiiiiPiPiPi") self.update_wind_stress(self.kernel, self.swe_2D) #Create data by uploading to device self.gpu_data = Common.SWEDataArakawaA(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0) ## Allocating memory for geostrophical equilibrium variables self.reportGeostrophicEquilibrium = np.int32(reportGeostrophicEquilibrium) dummy_zero_array = np.zeros((ny+2*ghost_cells_y, nx+2*ghost_cells_x), dtype=np.float32, order='C') self.geoEq_uxpvy = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array) self.geoEq_Kx = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array) self.geoEq_Ly = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array) #Bathymetry self.bathymetry = Common.Bathymetry(gpu_ctx, self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, Hi, boundary_conditions) self.h0AsWaterElevation = h0AsWaterElevation if self.h0AsWaterElevation: self.bathymetry.waterElevationToDepth(self.gpu_data.h0) self.constant_equilibrium_depth = np.max(Hi) self.bc_kernel = Common.BoundaryConditionsArakawaA(gpu_ctx, \ self.nx, \ self.ny, \ ghost_cells_x, \ ghost_cells_y, \ self.boundary_conditions, \ ) # Small scale perturbation: self.small_scale_perturbation = small_scale_perturbation self.small_scale_model_error = None if small_scale_perturbation: if small_scale_perturbation_amplitude is None: self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self) else: self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self, soar_q0=small_scale_perturbation_amplitude) if self.write_netcdf: self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \ offset_x=self.offset_x, offset_y=self.offset_y)
def step(self, t_end=0.0, apply_stochastic_term=True, write_now=True, update_dt=False): """ Function which steps n timesteps. apply_stochastic_term: Boolean value for whether the stochastic perturbation (if any) should be applied. """ if self.t == 0: self.bc_kernel.update_bc_values(self.gpu_stream, self.t) self.bc_kernel.boundaryCondition(self.gpu_stream, \ self.gpu_data.h0, self.gpu_data.hu0, self.gpu_data.hv0) t_now = 0.0 while (t_now < t_end): #for i in range(0, n): # Get new random wind direction (emulationg large-scale model error) if(self.max_wind_direction_perturbation > 0.0 and self.wind_stress.type() == 1): # max perturbation +/- max_wind_direction_perturbation deg within original wind direction (at t=0) perturbation = 2.0*(np.random.rand()-0.5) * self.max_wind_direction_perturbation; new_wind_stress = WindStress.GenericUniformWindStress( \ rho_air=self.wind_stress.rho_air, \ wind_speed=self.wind_stress.wind_speed, \ wind_direction=self.wind_stress.wind_direction + perturbation) # Upload new wind stress params to device cuda.memcpy_htod_async(int(self.wind_stress_dev), new_wind_stress.tostruct(), stream=self.gpu_stream) # Calculate dt if using automatic dt if (self.dt <= 0 or update_dt): self.updateDt() local_dt = np.float32(min(self.dt, np.float32(t_end - t_now))) wind_stress_t = np.float32(self.update_wind_stress(self.kernel, self.cdklm_swe_2D)) self.bc_kernel.update_bc_values(self.gpu_stream, self.t) #self.bc_kernel.boundaryCondition(self.cl_queue, \ # self.gpu_data.h1, self.gpu_data.hu1, self.gpu_data.hv1) # 2nd order Runge Kutta if (self.rk_order == 2): self.callKernel(self.gpu_data.h0, self.gpu_data.hu0, self.gpu_data.hv0, \ self.gpu_data.h1, self.gpu_data.hu1, self.gpu_data.hv1, \ local_dt, wind_stress_t, 0) self.bc_kernel.boundaryCondition(self.gpu_stream, \ self.gpu_data.h1, self.gpu_data.hu1, self.gpu_data.hv1) self.callKernel(self.gpu_data.h1, self.gpu_data.hu1, self.gpu_data.hv1, \ self.gpu_data.h0, self.gpu_data.hu0, self.gpu_data.hv0, \ local_dt, wind_stress_t, 1) # Applying final boundary conditions after perturbation (if applicable) elif (self.rk_order == 1): self.callKernel(self.gpu_data.h0, self.gpu_data.hu0, self.gpu_data.hv0, \ self.gpu_data.h1, self.gpu_data.hu1, self.gpu_data.hv1, \ local_dt, wind_stress_t, 0) self.gpu_data.swap() # Applying boundary conditions after perturbation (if applicable) # 3rd order RK method: elif (self.rk_order == 3): self.callKernel(self.gpu_data.h0, self.gpu_data.hu0, self.gpu_data.hv0, \ self.gpu_data.h1, self.gpu_data.hu1, self.gpu_data.hv1, \ local_dt, wind_stress_t, 0) self.bc_kernel.boundaryCondition(self.gpu_stream, \ self.gpu_data.h1, self.gpu_data.hu1, self.gpu_data.hv1) self.callKernel(self.gpu_data.h1, self.gpu_data.hu1, self.gpu_data.hv1, \ self.gpu_data.h0, self.gpu_data.hu0, self.gpu_data.hv0, \ local_dt, wind_stress_t, 1) self.bc_kernel.boundaryCondition(self.gpu_stream, \ self.gpu_data.h1, self.gpu_data.hu1, self.gpu_data.hv1) self.callKernel(self.gpu_data.h1, self.gpu_data.hu1, self.gpu_data.hv1, \ self.gpu_data.h0, self.gpu_data.hu0, self.gpu_data.hv0, \ local_dt, wind_stress_t, 2) # Applying final boundary conditions after perturbation (if applicable) # Perturb ocean state with model error if self.small_scale_perturbation and apply_stochastic_term: self.small_scale_model_error.perturbSim(self) # Apply boundary conditions self.bc_kernel.boundaryCondition(self.gpu_stream, \ self.gpu_data.h0, self.gpu_data.hu0, self.gpu_data.hv0) # Evolve drifters if self.hasDrifters: self.drifters.drift(self.gpu_data.h0, self.gpu_data.hu0, \ self.gpu_data.hv0, \ np.float32(self.constant_equilibrium_depth), \ self.nx, self.ny, self.dx, self.dy, \ local_dt, \ np.int32(2), np.int32(2)) self.t += np.float64(local_dt) t_now += np.float64(local_dt) self.num_iterations += 1 if self.write_netcdf and write_now: self.sim_writer.writeTimestep(self) return self.t
def __init__(self, \ gpu_ctx, \ eta0, hu0, hv0, H, \ nx, ny, \ dx, dy, dt, \ g, f, r, \ angle=np.array([[0]], dtype=np.float32), \ t=0.0, \ theta=1.3, rk_order=2, \ coriolis_beta=0.0, \ max_wind_direction_perturbation = 0, \ wind_stress=WindStress.WindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ boundary_conditions_data=Common.BoundaryConditionsData(), \ small_scale_perturbation=False, \ small_scale_perturbation_amplitude=None, \ small_scale_perturbation_interpolation_factor = 1, \ model_time_step=None, reportGeostrophicEquilibrium=False, \ use_lcg=False, \ write_netcdf=False, \ comm=None, \ netcdf_filename=None, \ ignore_ghostcells=False, \ courant_number=0.8, \ offset_x=0, offset_y=0, \ flux_slope_eps = 1.0e-1, \ desingularization_eps = 1.0e-1, \ depth_cutoff = 1.0e-5, \ block_width=32, block_height=8, num_threads_dt=256, block_width_model_error=16, block_height_model_error=16): """ Initialization routine eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells H: Depth from equilibrium defined on cell corners, (nx+5)*(ny+5) corners nx: Number of cells along x-axis ny: Number of cells along y-axis dx: Grid cell spacing along x-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m) dt: Size of each timestep (90 s) g: Gravitational accelleration (9.81 m/s^2) f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y r: Bottom friction coefficient (2.4e-3 m/s) angle: Angle of rotation from North to y-axis t: Start simulation at time t theta: MINMOD theta used the reconstructions of the derivatives in the numerical scheme rk_order: Order of Runge Kutta method {1,2*,3} coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0) max_wind_direction_perturbation: Large-scale model error emulation by per-time-step perturbation of wind direction by +/- max_wind_direction_perturbation (degrees) wind_stress: Wind stress parameters boundary_conditions: Boundary condition object small_scale_perturbation: Boolean value for applying a stochastic model error small_scale_perturbation_amplitude: Amplitude (q0 coefficient) for model error small_scale_perturbation_interpolation_factor: Width factor for correlation in model error model_time_step: The size of a data assimilation model step (default same as dt) reportGeostrophicEquilibrium: Calculate the Geostrophic Equilibrium variables for each superstep use_lcg: Use LCG as the random number generator. Default is False, which means using curand. write_netcdf: Write the results after each superstep to a netCDF file comm: MPI communicator desingularization_eps: Used for desingularizing hu/h flux_slope_eps: Used for setting zero flux for symmetric Riemann fan depth_cutoff: Used for defining dry cells netcdf_filename: Use this filename. (If not defined, a filename will be generated by SimWriter.) """ self.logger = logging.getLogger(__name__) assert( rk_order < 4 or rk_order > 0 ), "Only 1st, 2nd and 3rd order Runge Kutta supported" if (rk_order == 3): assert(r == 0.0), "3rd order Runge Kutta supported only without friction" # Sort out internally represented ghost_cells in the presence of given # boundary conditions ghost_cells_x = 2 ghost_cells_y = 2 #Coriolis at "first" cell x_zero_reference_cell = ghost_cells_x y_zero_reference_cell = ghost_cells_y # In order to pass it to the super constructor # Boundary conditions self.boundary_conditions = boundary_conditions if (boundary_conditions.isSponge()): nx = nx + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3] - 2*ghost_cells_x ny = ny + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2] - 2*ghost_cells_y x_zero_reference_cell += boundary_conditions.spongeCells[3] y_zero_reference_cell += boundary_conditions.spongeCells[2] #Compensate f for reference cell (first cell in internal of domain) north = np.array([np.sin(angle[0,0]), np.cos(angle[0,0])]) f = f - coriolis_beta * (x_zero_reference_cell*dx*north[0] + y_zero_reference_cell*dy*north[1]) x_zero_reference_cell = 0 y_zero_reference_cell = 0 A = None self.max_wind_direction_perturbation = max_wind_direction_perturbation super(CDKLM16, self).__init__(gpu_ctx, \ nx, ny, \ ghost_cells_x, \ ghost_cells_y, \ dx, dy, dt, \ g, f, r, A, \ t, \ theta, rk_order, \ coriolis_beta, \ y_zero_reference_cell, \ wind_stress, \ write_netcdf, \ ignore_ghostcells, \ offset_x, offset_y, \ comm, \ block_width, block_height) # Index range for interior domain (north, east, south, west) # so that interior domain of eta is # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \ # self.interior_domain_indices[3]:self.interior_domain_indices[1] ] self.interior_domain_indices = np.array([-2,-2,2,2]) self._set_interior_domain_from_sponge_cells() defines={'block_width': block_width, 'block_height': block_height, 'KPSIMULATOR_DESING_EPS': str(desingularization_eps)+'f', 'KPSIMULATOR_FLUX_SLOPE_EPS': str(flux_slope_eps)+'f', 'KPSIMULATOR_DEPTH_CUTOFF': str(depth_cutoff)+'f'} #Get kernels self.kernel = gpu_ctx.get_kernel("CDKLM16_kernel.cu", defines=defines, compile_args={ # default, fast_math, optimal 'options' : ["--ftz=true", # false, true, true "--prec-div=false", # true, false, false, "--prec-sqrt=false", # true, false, false "--fmad=false"] # true, true, false #'options': ["--use_fast_math"] #'options': ["--generate-line-info"], #nvcc_options=["--maxrregcount=39"], #'arch': "compute_50", #'code': "sm_50" }, jit_compile_args={ #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)] } ) # Get CUDA functions and define data types for prepared_{async_}call() self.cdklm_swe_2D = self.kernel.get_function("cdklm_swe_2D") self.cdklm_swe_2D.prepare("iiffffffffiiPiPiPiPiPiPiPiPiffi") self.update_wind_stress(self.kernel, self.cdklm_swe_2D) # CUDA functions for finding max time step size: self.num_threads_dt = num_threads_dt self.num_blocks_dt = np.int32(self.global_size[0]*self.global_size[1]) self.update_dt_kernels = gpu_ctx.get_kernel("max_dt.cu", defines={'block_width': block_width, 'block_height': block_height, 'NUM_THREADS': self.num_threads_dt}) self.per_block_max_dt_kernel = self.update_dt_kernels.get_function("per_block_max_dt") self.per_block_max_dt_kernel.prepare("iifffPiPiPiPifPi") self.max_dt_reduction_kernel = self.update_dt_kernels.get_function("max_dt_reduction") self.max_dt_reduction_kernel.prepare("iPP") # Bathymetry self.bathymetry = Common.Bathymetry(gpu_ctx, self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, H, boundary_conditions) # Adjust eta for possible dry states Hm = self.downloadBathymetry()[1] eta0 = np.maximum(eta0, -Hm) # Create data by uploading to device self.gpu_data = Common.SWEDataArakawaA(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0) # Allocate memory for calculating maximum timestep host_dt = np.zeros((self.global_size[1], self.global_size[0]), dtype=np.float32) self.device_dt = Common.CUDAArray2D(self.gpu_stream, self.global_size[0], self.global_size[1], 0, 0, host_dt) host_max_dt_buffer = np.zeros((1,1), dtype=np.float32) self.max_dt_buffer = Common.CUDAArray2D(self.gpu_stream, 1, 1, 0, 0, host_max_dt_buffer) self.courant_number = courant_number ## Allocating memory for geostrophical equilibrium variables self.reportGeostrophicEquilibrium = np.int32(reportGeostrophicEquilibrium) self.geoEq_uxpvy = None self.geoEq_Kx = None self.geoEq_Ly = None if self.reportGeostrophicEquilibrium: dummy_zero_array = np.zeros((ny+2*ghost_cells_y, nx+2*ghost_cells_x), dtype=np.float32, order='C') self.geoEq_uxpvy = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array) self.geoEq_Kx = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array) self.geoEq_Ly = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array) self.constant_equilibrium_depth = np.max(H) self.bc_kernel = Common.BoundaryConditionsArakawaA(gpu_ctx, \ self.nx, \ self.ny, \ ghost_cells_x, \ ghost_cells_y, \ self.boundary_conditions, \ boundary_conditions_data, \ ) # Small scale perturbation: self.small_scale_perturbation = small_scale_perturbation self.small_scale_model_error = None self.small_scale_perturbation_interpolation_factor = small_scale_perturbation_interpolation_factor if small_scale_perturbation: if small_scale_perturbation_amplitude is None: self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self, interpolation_factor=small_scale_perturbation_interpolation_factor, use_lcg=use_lcg, block_width=block_width_model_error, block_height=block_height_model_error) else: self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self, soar_q0=small_scale_perturbation_amplitude, interpolation_factor=small_scale_perturbation_interpolation_factor, use_lcg=use_lcg, block_width=block_width_model_error, block_height=block_height_model_error) # Data assimilation model step size self.model_time_step = model_time_step if model_time_step is None: self.model_time_step = self.dt self.total_time_steps = 0 if self.write_netcdf: self.sim_writer = SimWriter.SimNetCDFWriter(self, filename=netcdf_filename, ignore_ghostcells=self.ignore_ghostcells, \ offset_x=self.offset_x, offset_y=self.offset_y) #Upload data to GPU and bind to texture reference self.angle_texref = self.kernel.get_texref("angle_tex") self.angle_texref.set_array(cuda.np_to_array(np.ascontiguousarray(angle, dtype=np.float32), order="C")) # Set texture parameters self.angle_texref.set_filter_mode(cuda.filter_mode.LINEAR) #bilinear interpolation self.angle_texref.set_address_mode(0, cuda.address_mode.CLAMP) #no indexing outside domain self.angle_texref.set_address_mode(1, cuda.address_mode.CLAMP) self.angle_texref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES) #Use [0, 1] indexing
def __init__(self, \ gpu_ctx, \ H, eta0, hu0, hv0, \ nx, ny, \ dx, dy, dt, \ g, f, r, \ t=0.0, \ coriolis_beta=0.0, \ y_zero_reference_cell = 0, \ wind_stress=WindStress.WindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ write_netcdf=False, \ ignore_ghostcells=False, \ offset_x=0, offset_y=0, \ block_width=16, block_height=16): """ Initialization routine H: Water depth incl ghost cells, (nx+2)*(ny+2) cells eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells nx: Number of cells along x-axis ny: Number of cells along y-axis dx: Grid cell spacing along x-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m) dt: Size of each timestep (90 s) g: Gravitational accelleration (9.81 m/s^2) f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y r: Bottom friction coefficient (2.4e-3 m/s) coriolis_beta: Coriolis linear factor -> f = f + beta*y y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell . wind_stress: Wind stress parameters boundary_conditions: Boundary condition object write_netcdf: Write the results after each superstep to a netCDF file """ #Create data by uploading to device ghost_cells_x = 0 ghost_cells_y = 0 y_zero_reference_cell = y_zero_reference_cell self.asym_ghost_cells = [0, 0, 0, 0] # [N, E, S, W] # Index range for interior domain (north, east, south, west) # so that interior domain of eta is # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \ # self.interior_domain_indices[3]:self.interior_domain_indices[1] ] self.interior_domain_indices = np.array([None, None, 0, 0]) self.boundary_conditions = boundary_conditions # Add asym ghost cell if periodic boundary condition: if (self.boundary_conditions.north == 2) or \ (self.boundary_conditions.south == 2): self.asym_ghost_cells[0] = 1 self.interior_domain_indices[0] = -1 if (self.boundary_conditions.east == 2) or \ (self.boundary_conditions.west == 2): self.asym_ghost_cells[1] = 1 self.interior_domain_indices[1] = -1 if boundary_conditions.isSponge(): nx = nx + boundary_conditions.spongeCells[ 1] + boundary_conditions.spongeCells[ 3] # - self.asym_ghost_cells[1] - self.asym_ghost_cells[3] ny = ny + boundary_conditions.spongeCells[ 0] + boundary_conditions.spongeCells[ 2] # - self.asym_ghost_cells[0] - self.asym_ghost_cells[2] y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[ 2] rk_order = None theta = None A = None super(FBL, self).__init__(gpu_ctx, \ nx, ny, \ ghost_cells_x, \ ghost_cells_y, \ dx, dy, dt, \ g, f, r, A, \ t, \ theta, rk_order, \ coriolis_beta, \ y_zero_reference_cell, \ wind_stress, \ write_netcdf, \ ignore_ghostcells, \ offset_x, offset_y, \ block_width, block_height) self._set_interior_domain_from_sponge_cells() #Get kernels self.u_kernel = gpu_ctx.get_kernel("FBL_U_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) self.v_kernel = gpu_ctx.get_kernel("FBL_V_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) self.eta_kernel = gpu_ctx.get_kernel("FBL_eta_kernel.cu", defines={ 'block_width': block_width, 'block_height': block_height }) # Get CUDA functions self.computeUKernel = self.u_kernel.get_function("computeUKernel") self.computeVKernel = self.v_kernel.get_function("computeVKernel") self.computeEtaKernel = self.eta_kernel.get_function( "computeEtaKernel") # Prepare kernel lauches self.computeUKernel.prepare("iiffffffffPiPiPiPif") self.computeVKernel.prepare("iiffffffffPiPiPiPif") self.computeEtaKernel.prepare("iiffffffffPiPiPiPi") # Set up textures self.update_wind_stress(self.u_kernel, self.computeUKernel) self.update_wind_stress(self.v_kernel, self.computeVKernel) self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, H, self.asym_ghost_cells) self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0, self.asym_ghost_cells) # Overwrite halo with asymetric ghost cells self.nx_halo = np.int32(nx + self.asym_ghost_cells[1] + self.asym_ghost_cells[3]) self.ny_halo = np.int32(ny + self.asym_ghost_cells[0] + self.asym_ghost_cells[2]) self.bc_kernel = FBL_periodic_boundary(self.gpu_ctx, \ self.nx, \ self.ny, \ self.boundary_conditions, \ self.asym_ghost_cells ) self.totalNumIterations = 0 if self.write_netcdf: self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \ staggered_grid=True, offset_x=self.offset_x, offset_y=self.offset_y)
def __init__(self, \ cl_ctx, \ h0, hu0, hv0, \ Bi, \ nx, ny, \ dx, dy, dt, \ g, f, r, \ theta=1.3, use_rk2=True, \ wind_stress=WindStress.NoWindStress(), \ boundary_conditions=Common.BoundaryConditions(), \ h0AsWaterElevation=True, \ block_width=16, block_height=16): print("Using RECURSIVE CDKLM scheme!") self.cl_ctx = cl_ctx #Create an OpenCL command queue self.cl_queue = cl.CommandQueue(self.cl_ctx) #Get kernels self.kernel = Common.get_kernel(self.cl_ctx, "recursiveCDKLM16_kernel.opencl", defines={'block_width': block_width, 'block_height': block_height}) # Boundary Conditions self.boundary_conditions = boundary_conditions self.boundaryType = np.int32(1) if (boundary_conditions.north == 2 and boundary_conditions.east == 2): self.boundaryType = np.int32(2) elif (boundary_conditions.north == 2): self.boundaryType = np.int32(3) elif (boundary_conditions.east == 2): self.boundaryType = np.int32(4) #Create data by uploading to device ghost_cells_x = 3 ghost_cells_y = 3 self.ghost_cells_x = 3 self.ghost_cells_y = 3 if (boundary_conditions.isSponge()): nx = nx + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3] - 2*self.ghost_cells_x ny = ny + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2] - 2*self.ghost_cells_y y_zero_reference = boundary_conditions.spongeCells[2] #Create data by uploading to device self.cl_data = Common.SWEDataArakawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0) #Bathymetry self.bathymetry = Common.Bathymetry(self.cl_ctx, self.cl_queue, nx, ny, ghost_cells_x, ghost_cells_y, Bi, boundary_conditions) #Save input parameters #Notice that we need to specify them in the correct dataformat for the #OpenCL kernel self.nx = np.int32(nx) self.ny = np.int32(ny) self.dx = np.float32(dx) self.dy = np.float32(dy) self.dt = np.float32(dt) self.g = np.float32(g) self.f = np.float32(f) self.r = np.float32(r) self.theta = np.float32(theta) self.use_rk2 = use_rk2 self.wind_stress = wind_stress self.h0AsWaterElevation = h0AsWaterElevation #Initialize time self.t = np.float32(0.0) #Compute kernel launch parameters self.local_size = (block_width, block_height) self.global_size = ( \ int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \ int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \ ) self.bc_kernel = Common.BoundaryConditionsArakawaA(self.cl_ctx, \ self.nx, \ self.ny, \ ghost_cells_x, \ ghost_cells_y, \ self.boundary_conditions, \ ) if self.h0AsWaterElevation: self.bathymetry.waterElevationToDepth(self.cl_data.h0)
def __init__(self, gpu_ctx, numDrifters, \ observation_variance=0.01, wind = WindStress.WindStress(), wind_drift_factor = 0.0,\ boundaryConditions=Common.BoundaryConditions(), \ initialization_cov_drifters=None, \ domain_size_x=1.0, domain_size_y=1.0, \ gpu_stream=None, \ block_width = 64): super(GPUDrifterCollection, self).__init__(numDrifters, observation_variance=observation_variance, boundaryConditions=boundaryConditions, domain_size_x=domain_size_x, domain_size_y=domain_size_y) # Define CUDA environment: self.gpu_ctx = gpu_ctx self.block_width = block_width self.block_height = 1 self.wind = wind self.wind_drift_factor = np.float32(wind_drift_factor) # TODO: Where should the cl_queue come from? # For sure, the drifter and the ocean simulator should use # the same queue... self.gpu_stream = gpu_stream if self.gpu_stream is None: self.gpu_stream = cuda.Stream() self.sensitivity = 1.0 self.driftersHost = np.zeros( (self.getNumDrifters() + 1, 2)).astype(np.float32, order='C') self.driftersDevice = Common.CUDAArray2D(self.gpu_stream, \ 2, self.getNumDrifters()+1, 0, 0, \ self.driftersHost) self.drift_kernels = gpu_ctx.get_kernel("driftKernels.cu", \ defines={'block_width': self.block_width, 'block_height': self.block_height }) # Get CUDA functions and define data types for prepared_{async_}call() self.passiveDrifterKernel = self.drift_kernels.get_function( "passiveDrifterKernel") self.passiveDrifterKernel.prepare("iifffiiPiPiPiPiiiiPifff") self.enforceBoundaryConditionsKernel = self.drift_kernels.get_function( "enforceBoundaryConditions") self.enforceBoundaryConditionsKernel.prepare("ffiiiPi") if self.wind_drift_factor: #Initialize wind parameters self.wind_textures = {} self.wind_timestamps = {} self.update_wind(self.drift_kernels, self.passiveDrifterKernel, 0.0) self.local_size = (self.block_width, self.block_height, 1) self.global_size = (\ int(np.ceil((self.getNumDrifters() + 2)/float(self.block_width))), \ 1) # Initialize drifters: self.uniformly_distribute_drifters( initialization_cov_drifters=initialization_cov_drifters)