def setUp(self): self.gpu_ctx = Common.CUDAContext() self.nx = 50 self.ny = 70 self.dx = 200.0 self.dy = 200.0 self.dt = 0.95 self.g = 9.81 self.f = 0.0 self.r = 0.0 self.A = 1 #self.h0 = np.ones((self.ny+2, self.nx+2), dtype=np.float32) * 60; self.waterHeight = 60 self.eta0 = None self.u0 = None self.v0 = None self.Hi = None self.Bi = None self.ghosts = [2,2,2,2] # north, east, south, west self.validDomain = np.array([2,2,2,2]) self.refRange = [-2, -2, 2, 2] self.dataRange = self.refRange self.boundaryConditions = None self.T = 50.0 self.sim = None
def generateInitialConditions(sim_args, water_depth): from SWESimulators import CDKLM16, Common assert (MPI.COMM_WORLD.rank == 0) dataShape = (sim_args['ny'] + 4, sim_args['nx'] + 4) dataShapeHi = (sim_args['ny'] + 5, sim_args['nx'] + 5) sim_ic = { 'H': np.ones(dataShapeHi, dtype=np.float32) * water_depth, 'eta0': np.zeros(dataShape, dtype=np.float32), 'hu0': np.zeros(dataShape, dtype=np.float32), 'hv0': np.zeros(dataShape, dtype=np.float32) } #Very inefficient way of creating perturbed initial state, but works cuda_ctx = Common.CUDAContext() sim = CDKLM16.CDKLM16(cuda_ctx, **sim_args, **sim_ic) sim.perturbState(q0_scale=100) # Create a random initial state sim_ic['eta0'], sim_ic['hu0'], sim_ic['hv0'] = sim.download( interior_domain_only=False) sim_ic['H'] = sim.downloadBathymetry()[0] sim = None gc.collect() return sim_ic
def setUp(self): self.gpu_ctx = Common.CUDAContext() self.nx = 50 self.ny = 70 self.dx = 200.0 self.dy = 200.0 self.dt = 1 self.g = 9.81 self.f = 0.0 self.r = 0.0 self.h0 = None #np.ones((self.ny, self.nx), dtype=np.float32) * 60; self.eta0 = None # np.zeros((self.ny, self.nx), dtype=np.float32); self.u0 = None # np.zeros((self.ny, self.nx+1), dtype=np.float32); self.v0 = None # np.zeros((self.ny+1, self.nx), dtype=np.float32); self.T = 50.0 self.boundaryConditions = None #Common.BoundaryConditions() self.ghosts = [1, 1, 1, 1] self.arrayRange = None self.sim = None
def setUp(self): self.gpu_ctx = Common.CUDAContext() self.nx = 50 self.ny = 70 self.dx = 200.0 self.dy = 200.0 self.dt = 1 self.g = 9.81 self.f = 0.0 self.r = 0.0 self.A = 1 self.h0 = np.ones((self.ny + 2, self.nx + 2), dtype=np.float32) * 60 self.eta0 = np.zeros((self.ny + 2, self.nx + 2), dtype=np.float32) self.u0 = np.zeros((self.ny + 2, self.nx + 1 + 2), dtype=np.float32) self.v0 = np.zeros((self.ny + 1 + 2, self.nx + 2), dtype=np.float32) self.ghosts = [1, 1, 1, 1] # north, east, south, west self.etaRange = [-1, -1, 1, 1] self.uRange = [-1, -2, 1, 2] self.vRange = [-2, -1, 2, 1] self.refEtaRange = self.etaRange self.refURange = self.uRange self.refVRange = self.vRange self.boundaryConditions = None self.T = 50.0 self.sim = None
def setUp(self): self.gpu_ctx = Common.CUDAContext() self.sim = None self.file_sim = None self.printall = True
def cuda_context_handler(self, line): args = magic_arguments.parse_argstring(self.cuda_context_handler, line) self.logger = logging.getLogger(__name__) self.logger.info("Registering %s in user workspace", args.name) if args.name in self.shell.user_ns.keys(): self.logger.debug("Context already registered! Ignoring") return else: self.logger.debug("Creating context") use_cache = False if args.no_cache else True self.shell.user_ns[args.name] = Common.CUDAContext( blocking=args.blocking, use_cache=use_cache) # this function will be called on exceptions in any cell def custom_exc(shell, etype, evalue, tb, tb_offset=None): self.logger.exception( "Exception caught: Resetting to CUDA context %s", args.name) while (cuda.Context.get_current() != None): context = cuda.Context.get_current() self.logger.info("Popping <%s>", str(context.handle)) cuda.Context.pop() if args.name in self.shell.user_ns.keys(): self.logger.info( "Pushing <%s>", str(self.shell.user_ns[args.name].cuda_context.handle)) self.shell.user_ns[args.name].cuda_context.push() else: self.logger.error( "No CUDA context called %s found (something is wrong)", args.name) self.logger.error("CUDA will not work now") self.logger.debug( "==================================================================" ) # still show the error within the notebook, don't just swallow it shell.showtraceback((etype, evalue, tb), tb_offset=tb_offset) # this registers a custom exception handler for the whole current notebook get_ipython().set_custom_exc((Exception, ), custom_exc) # Handle CUDA context when exiting python import atexit def exitfunc(): self.logger.info("Exitfunc: Resetting CUDA context stack") while (cuda.Context.get_current() != None): context = cuda.Context.get_current() self.logger.info("`-> Popping <%s>", str(context.handle)) cuda.Context.pop() self.logger.debug( "==================================================================" ) atexit.register(exitfunc)
def setUp(self): self.sim = None self.ensemble = None self.iewpf = None self.gpu_ctx = Common.CUDAContext() self.setUpAndStartEnsemble()
def setUp(self): self.gpu_ctx = Common.CUDAContext() self.gpu_stream = cuda.Stream() self.nx = 30 self.ny = 40 self.dx = 7.0 self.dy = 7.0 self.f = 0.02 self.g = 9.81 self.beta = 0.0 self.noise = None self.ghost_cells_x = 2 self.ghost_cells_y = 2 self.datashape = (self.ny + 2 * self.ghost_cells_y, self.nx + 2 * self.ghost_cells_x) self.cutoff = 2 self.nx_nonPeriodic = self.nx + 2 * (2 + self.cutoff) self.ny_nonPeriodic = self.ny + 2 * (2 + self.cutoff) # Standard setup is non-staggered, periodic self.staggered = False self.periodicNS = True self.periodicEW = True # Total number of threads should be: 16, 32, 48, 64 # Corresponding to the number of blocks: 1, 2, 3, 4 self.glob_size_x = 3 self.glob_size_y = 3 self.glob_size_x_nonperiodic = 3 self.glob_size_y_nonperiodic = 3 self.glob_size_random_x = 1 self.glob_size_random_x_nonperiodic = 2 self.large_nx = 400 self.large_ny = 400 self.large_noise = None self.floatMax = 2147483648.0 self.eta = None self.hu = None self.hv = None self.H = None
def runBenchmark(simulator, sim_args, sim_ic, iterations, steps_per_download): print("Creating context", flush=True) cuda_context = Common.CUDAContext() #Initialize simulator print("Creating simulator", flush=True) sim = simulator(gpu_ctx=cuda_context, **sim_args, **sim_ic) print("Simulating", flush=True) #Run a simulation and plot it for i in range(iterations): print(".", end='', flush=True) sim.step(steps_per_download * sim_args['dt']) eta1, u1, v1 = sim.download() print("", flush=True) print("eta: [{:f}, {:f}]".format(np.max(eta1), np.min(eta1))) print("u: [{:f}, {:f}]".format(np.max(u1), np.min(u1))) print("v: [{:f}, {:f}]".format(np.max(v1), np.min(v1)))
def setUp(self): #Set which CL device to use, and disable kernel caching self.gpu_ctx = Common.CUDAContext() # Make some host data which we can play with self.nx = 3 self.ny = 5 self.nx_halo = 1 self.ny_halo = 2 self.dataShape = (self.ny + 2 * self.ny_halo, self.nx + 2 * self.nx_halo) self.buf1 = np.zeros(self.dataShape, dtype=np.float32, order='C') self.dbuf1 = np.zeros(self.dataShape) self.buf3 = np.zeros(self.dataShape, dtype=np.float32, order='C') self.dbuf3 = np.zeros(self.dataShape) for j in range(self.dataShape[0]): for i in range(self.dataShape[1]): self.buf1[j, i] = i * 100 + j self.dbuf1[j, i] = self.buf1[j, i] self.buf3[j, i] = j * 1000 - i self.dbuf3[j, i] = self.buf3[j, i] self.explicit_free = False self.device_name = self.gpu_ctx.cuda_device.name() self.gpu_stream = cuda.Stream() self.tests_failed = True self.cudaarray = Common.CUDAArray2D(self.gpu_stream, \ self.nx, self.ny, \ self.nx_halo, self.ny_halo, \ self.buf1) self.double_cudaarray = None
print("Making " + str(args.ensemble_size) + " ensemble members") # Import timing utilities import time tic = time.time() # Import packages we need import numpy as np from SWESimulators import CDKLM16, Common, DoubleJetCase toc = time.time() print("{:02.4f} s: ".format(toc - tic) + "Imported packages") # Create CUDA context tic = time.time() gpu_ctx = Common.CUDAContext() device_name = gpu_ctx.cuda_device.name() toc = time.time() print("{:02.4f} s: ".format(toc - tic) + "Created context on " + device_name) sim = None # # Initialize and spinup all ensemble members # for ensemble_member in range(args.ensemble_size): print("Creating ensemble member " + str(ensemble_member)) tic = time.time() # Generate parameters and initial conditions (which includes spin up time)
def setUp(self): super(GPUDrifterTest, self).setUp() self.gpu_ctx = Common.CUDAContext()
def setUp(self): super(DrifterEnsembleTest, self).setUp() self.gpu_ctx = Common.CUDAContext()
def __init__(self, comm, observation_file, observation_type=dautils.ObservationType.UnderlyingFlow, local_ensemble_size=None, sim_args={}, data_args={}, ensemble_args={}, metadata={}): """ Initialize the ensemble. Only rank 0 should receive the optional arguments. The constructor handles initialization across nodes """ self.logger = logging.getLogger(__name__ + "_rank=" + str(comm.rank)) self.logger.debug("Initializing") self.t = 0 assert(observation_file is not None) assert('observation_variance' in ensemble_args.keys()) #Broadcast general information about ensemble ########################## self.comm = comm self.num_nodes = self.comm.size assert self.comm.size >= 1, "You appear to not be using enough MPI nodes (at least one required)" self.local_ensemble_size = local_ensemble_size self.local_ensemble_size = self.comm.bcast(self.local_ensemble_size, root=0) # Ensure all particles in all processes use the same timestamp and writes to the same super dir (common for each EPS run) if (self.comm.rank == 0): self.timestamp = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") self.timestamp_short = datetime.datetime.now().strftime("%Y_%m_%d") netcdf_filename = self.timestamp + ".nc" if "SLURM_JOB_ID" in os.environ: job_id = int(os.environ["SLURM_JOB_ID"]) self.super_dir_name = "EPS_" + str(job_id) + "_" + self.timestamp else: self.super_dir_name = "EPS_" + self.timestamp os.makedirs(self.super_dir_name, exist_ok=True) # Write some useful metadata file write_metadata = {} import subprocess write_metadata["git_revision"] = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=os.path.dirname(__file__)).strip().decode() write_metadata["id"] = self.super_dir_name write_metadata["timestamp"] = self.timestamp write_metadata["num_rank"] = self.comm.size write_metadata["local_ensemble_size"] = self.local_ensemble_size write_metadata["global_ensemble_size"] = self.local_ensemble_size * self.comm.size write_metadata["observation_variance"] = ensemble_args["observation_variance"] for k, v in metadata.items(): write_metadata[k] = v metadata_file = os.path.join(self.super_dir_name, self.super_dir_name + "_metadata.json") with open(metadata_file, "w") as write_file: json.dump(write_metadata, write_file) else: self.timestamp = None self.timestamp_short = None netcdf_filename = None self.super_dir_name = None self.timestamp = self.comm.bcast(self.timestamp, root=0) self.timestamp_short = self.comm.bcast(self.timestamp_short, root=0) netcdf_filename = self.comm.bcast(netcdf_filename, root=0) self.super_dir_name = self.comm.bcast(self.super_dir_name, root=0) #Broadcast initial conditions for simulator ########################## self.sim_args = sim_args self.sim_args = self.comm.bcast(self.sim_args, root=0) # FIXME: Optimize: Use Bcast(...) self.data_args = data_args self.data_args = self.comm.bcast(self.data_args, root=0) self.data_shape = (self.data_args['ny'], self.data_args['nx']) #Broadcast arguments that we do not store in self ############################## ensemble_args = self.comm.bcast(ensemble_args, root=0) #Create ensemble on local node ############################## self.logger.info("Creating ensemble with %d members", self.local_ensemble_size) self.gpu_ctx = Common.CUDAContext(device=comm.rank) # DEBUG self.sim_args["comm"] = self.comm #Read observations from file self.observations = Observation.Observation(observation_type=observation_type, domain_size_x=self.data_args["nx"]*self.data_args["dx"], domain_size_y=self.data_args["ny"]*self.data_args["dy"], nx=self.data_args["nx"], ny=self.data_args["ny"], observation_variance=ensemble_args["observation_variance"]) self.observations.read_pickle(observation_file) self.num_drifters = self.observations.get_num_drifters() self.ensemble = OceanModelEnsemble.OceanModelEnsemble( self.gpu_ctx, self.sim_args, self.data_args, self.local_ensemble_size, **ensemble_args, super_dir_name=self.super_dir_name, netcdf_filename=netcdf_filename, rank=self.comm.rank) self.perturbators = [None]*4 self.num_perturbators = 4 self._initPerturbators() # perturb the initial ensemble for particle_id in range(len(self.ensemble.particles)): self._perturbParticle(particle_id) # required when observing drifters self.Hm = self.ensemble.particles[0].downloadBathymetry(interior_domain_only=True)[1] assert(self.Hm.shape == (self.data_args["ny"], self.data_args["nx"])), 'Wrong size for self.Hm'
def __init__(self, comm, local_ensemble_size=None, drifter_positions=[], sim_args={}, sim_ic={}, sim_bc_args={}, ensemble_args={}): """ Initialize the ensemble. Only rank 0 should receive the optional arguments. The constructor handles initialization across nodes """ self.logger = logging.getLogger(__name__ + "_rank=" + str(comm.rank)) self.logger.debug("Initializing") #Broadcast general information about ensemble ########################## self.comm = comm self.num_nodes = self.comm.size - 1 #Root does not participate assert self.comm.size >= 2, "You appear to not be using enough MPI nodes (at least two required)" self.local_ensemble_size = local_ensemble_size self.local_ensemble_size = self.comm.bcast(self.local_ensemble_size, root=0) self.num_drifters = len(drifter_positions) self.num_drifters = self.comm.bcast(self.num_drifters, root=0) #Broadcast initial conditions for simulator ########################## self.sim_args = sim_args self.sim_args = self.comm.bcast(self.sim_args, root=0) self.data_shape = (self.sim_args['ny'] + 4, self.sim_args['nx'] + 4) if (self.comm.rank == 0): assert (sim_ic['H'].dtype == np.float32) assert (sim_ic['eta0'].dtype == np.float32) assert (sim_ic['hu0'].dtype == np.float32) assert (sim_ic['hv0'].dtype == np.float32) assert (sim_ic['H'].shape == (self.data_shape[0] + 1, self.data_shape[1] + 1)) assert (sim_ic['eta0'].shape == self.data_shape) assert (sim_ic['hu0'].shape == self.data_shape) assert (sim_ic['hv0'].shape == self.data_shape) else: #FIXME: hardcoded for CDKLM four ghost cells sim_ic['H'] = np.empty( (self.data_shape[0] + 1, self.data_shape[1] + 1), dtype=np.float32) sim_ic['eta0'] = np.empty(self.data_shape, dtype=np.float32) sim_ic['hu0'] = np.empty(self.data_shape, dtype=np.float32) sim_ic['hv0'] = np.empty(self.data_shape, dtype=np.float32) #FIXME: Optimize this to one transfer by packing arrays? self.comm.Bcast(sim_ic['H'], root=0) self.comm.Bcast(sim_ic['eta0'], root=0) self.comm.Bcast(sim_ic['hu0'], root=0) self.comm.Bcast(sim_ic['hv0'], root=0) self.logger.debug("eta0 is %s", str(sim_ic['eta0'])) #Broadcast arguments that we do not store in self ############################## ensemble_args = self.comm.bcast(ensemble_args, root=0) sim_bc_args = self.comm.bcast(sim_bc_args, root=0) sim_ic['boundary_conditions'] = Common.BoundaryConditions( **sim_bc_args) #Create ensemble on local node ############################## self.logger.info("Creating ensemble with %d members", self.local_ensemble_size) self.gpu_ctx = Common.CUDAContext() if (self.comm.rank == 0): num_ensemble_members = 1 else: num_ensemble_members = self.local_ensemble_size self.ensemble = OceanModelEnsemble.OceanModelEnsemble( self.gpu_ctx, self.sim_args, sim_ic, num_ensemble_members, drifter_positions=drifter_positions, **ensemble_args)