def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type, esmf_dst_type, genweights, esmf_regrid_method, spatial_subset, src_resolution, dst_resolution, buffer_distance, wd, persist, eager, ignore_degenerate, data_variables, spatial_subset_path, verbose, loglvl, weightfilemode): # Used for creating the history string. the_locals = locals() if verbose: ocgis_lh.configure(to_stream=True, level=getattr(logging, loglvl)) ocgis_lh(msg="Starting Chunked Regrid Weight Generation", level=logging.INFO, logger=CRWG_LOG) if not ocgis.env.USE_NETCDF4_MPI: msg = ( 'env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is ' 'netCDF4-python built with parallel support?') ocgis_lh(msg, level=logging.WARN, logger=CRWG_LOG, force=True) if data_variables is not None: data_variables = data_variables.split(',') if nchunks_dst is not None: # Format the chunking decomposition from its string representation. if ',' in nchunks_dst: nchunks_dst = nchunks_dst.split(',') else: nchunks_dst = [nchunks_dst] nchunks_dst = tuple([int(ii) for ii in nchunks_dst]) if merge: if not spatial_subset and weight is None: raise ValueError('"weight" must be a valid path if --merge') if spatial_subset and genweights and weight is None: raise ValueError('"weight" must be a valid path if --genweights') # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets # and it is not a merge only operation. should_create_wd = (nchunks_dst is None or not all([ii == 1 for ii in nchunks_dst])) or spatial_subset if should_create_wd: if wd is None: if ocgis.vm.rank == 0: wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_') wd = ocgis.vm.bcast(wd) else: exc = None if ocgis.vm.rank == 0: # The working directory must not exist to proceed. if nchunks_dst is not None: if os.path.exists(wd): exc = ValueError( "Working directory {} must not exist.".format(wd)) else: # Make the working directory nesting as needed. os.makedirs(wd) exc = ocgis.vm.bcast(exc) if exc is not None: raise exc if merge and not spatial_subset or (spatial_subset and genweights): if _is_subdir_(wd, weight): raise ValueError( 'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.' ) # Create the source and destination request datasets. rd_src = _create_request_dataset_(source, esmf_src_type, data_variables=data_variables) rd_dst = _create_request_dataset_(destination, esmf_dst_type) # Execute a spatial subset if requested. paths = None if spatial_subset: if spatial_subset_path is None: spatial_subset_path = os.path.join(wd, 'spatial_subset.nc') msg = "Executing spatial subset. Output path is: {}".format( spatial_subset_path) ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path, src_resmax=src_resolution) # Only split grids if a spatial subset is not requested. else: # Update the paths to use for the grid. paths = {'wd': wd} # Arguments to ESMF regridding. esmf_kwargs = { 'regrid_method': esmf_regrid_method, 'ignore_degenerate': ignore_degenerate } # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset. gs = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks_dst, src_grid_resolution=src_resolution, paths=paths, dst_grid_resolution=dst_resolution, buffer_value=buffer_distance, redistribute=True, genweights=genweights, esmf_kwargs=esmf_kwargs, use_spatial_decomp='auto', eager=eager) # Write subsets and generate weights if requested in the grid splitter. # TODO: Need a weight only option. If chunks are written, then weights are written... if not spatial_subset and nchunks_dst is not None and not gs.is_one_chunk: msg = "Starting main chunking loop..." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) gs.write_chunks() else: if spatial_subset: source = spatial_subset_path if genweights: msg = "Writing ESMF weights..." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) handle_weight_file_check(weight) gs.write_esmf_weights(source, destination, weight, filemode=weightfilemode) # Create the global weight file. This does not apply to spatial subsets because there will always be one weight # file. if merge and not spatial_subset and not gs.is_one_chunk: # Weight file merge only works in serial. exc = None with ocgis.vm.scoped('weight file merge', [0]): if not ocgis.vm.is_null: msg = "Merging chunked weight files to global file. Output global weight file is: {}".format( weight) ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) handle_weight_file_check(weight) gs.create_merged_weight_file(weight) excs = ocgis.vm.gather(exc) excs = ocgis.vm.bcast(excs) for exc in excs: if exc is not None: raise exc ocgis.vm.barrier() # Append the history string if there is an output weight file. if weight and ocgis.vm.rank == 0: if os.path.exists(weight): # Add some additional stuff for record keeping import getpass import socket import datetime with nc.Dataset(weight, 'a') as ds: ds.setncattr('created_by_user', getpass.getuser()) ds.setncattr('created_on_hostname', socket.getfqdn()) ds.setncattr('history', create_history_string(the_locals)) ocgis.vm.barrier() # Remove the working directory unless the persist flag is provided. if not persist: if ocgis.vm.rank == 0: msg = "Removing working directory since persist is False." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) shutil.rmtree(wd) ocgis.vm.barrier() ocgis_lh(msg="Success!", level=logging.INFO, logger=CRWG_LOG) return 0
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type, esmf_dst_type, genweights, esmf_regrid_method, spatial_subset, src_resolution, dst_resolution, buffer_distance, wd, persist, eager, ignore_degenerate): if not ocgis.env.USE_NETCDF4_MPI: msg = ('env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is ' 'netCDF4-python built with parallel support?') ocgis_lh(msg, level=logging.WARN, logger='ocli.chunked_rwg', force=True) if nchunks_dst is not None: # Format the chunking decomposition from its string representation. if ',' in nchunks_dst: nchunks_dst = nchunks_dst.split(',') else: nchunks_dst = [nchunks_dst] nchunks_dst = tuple([int(ii) for ii in nchunks_dst]) if merge: if not spatial_subset and weight is None: raise ValueError('"weight" must be a valid path if --merge') if spatial_subset and genweights and weight is None: raise ValueError('"weight" must be a valid path if --genweights') # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets # and it is not a merge only operation. if wd is None: if ocgis.vm.rank == 0: wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_') wd = ocgis.vm.bcast(wd) else: if ocgis.vm.rank == 0: # The working directory must not exist to proceed. if os.path.exists(wd): raise ValueError("Working directory 'wd' must not exist.") else: # Make the working directory nesting as needed. os.makedirs(wd) ocgis.vm.barrier() if merge and not spatial_subset or (spatial_subset and genweights): if _is_subdir_(wd, weight): raise ValueError( 'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.') # Create the source and destination request datasets. rd_src = _create_request_dataset_(source, esmf_src_type) rd_dst = _create_request_dataset_(destination, esmf_dst_type) # Execute a spatial subset if requested. paths = None if spatial_subset: # TODO: This path should be customizable. spatial_subset_path = os.path.join(wd, 'spatial_subset.nc') _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path) # Only split grids if a spatial subset is not requested. else: # Update the paths to use for the grid. paths = {'wd': wd} # Arguments to ESMF regridding. esmf_kwargs = {'regrid_method': esmf_regrid_method, 'ignore_degenerate': ignore_degenerate} # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset. gs = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks_dst, src_grid_resolution=src_resolution, paths=paths, dst_grid_resolution=dst_resolution, buffer_value=buffer_distance, redistribute=True, genweights=genweights, esmf_kwargs=esmf_kwargs, use_spatial_decomp='auto', eager=eager) # Write subsets and generate weights if requested in the grid splitter. # TODO: Need a weight only option. If chunks are written, then weights are written... if not spatial_subset and nchunks_dst is not None: gs.write_chunks() else: if spatial_subset: source = spatial_subset_path if genweights: gs.write_esmf_weights(source, destination, weight) # Create the global weight file. This does not apply to spatial subsets because there will always be one weight # file. if merge and not spatial_subset: # Weight file merge only works in serial. exc = None with ocgis.vm.scoped('weight file merge', [0]): if not ocgis.vm.is_null: gs.create_merged_weight_file(weight) excs = ocgis.vm.gather(exc) excs = ocgis.vm.bcast(excs) for exc in excs: if exc is not None: raise exc ocgis.vm.barrier() # Remove the working directory unless the persist flag is provided. if not persist: if ocgis.vm.rank == 0: shutil.rmtree(wd) ocgis.vm.barrier() return 0
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type, esmf_dst_type, genweights, esmf_regrid_method, spatial_subset, src_resolution, dst_resolution, buffer_distance, wd, persist, eager, ignore_degenerate, data_variables, spatial_subset_path, verbose, loglvl): if verbose: ocgis_lh.configure(to_stream=True, level=getattr(logging, loglvl)) ocgis_lh(msg="Starting Chunked Regrid Weight Generation", level=logging.INFO, logger=CRWG_LOG) if not ocgis.env.USE_NETCDF4_MPI: msg = ('env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is ' 'netCDF4-python built with parallel support?') ocgis_lh(msg, level=logging.WARN, logger=CRWG_LOG, force=True) if data_variables is not None: data_variables = data_variables.split(',') if nchunks_dst is not None: # Format the chunking decomposition from its string representation. if ',' in nchunks_dst: nchunks_dst = nchunks_dst.split(',') else: nchunks_dst = [nchunks_dst] nchunks_dst = tuple([int(ii) for ii in nchunks_dst]) if merge: if not spatial_subset and weight is None: raise ValueError('"weight" must be a valid path if --merge') if spatial_subset and genweights and weight is None: raise ValueError('"weight" must be a valid path if --genweights') # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets # and it is not a merge only operation. if wd is None: if ocgis.vm.rank == 0: wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_') wd = ocgis.vm.bcast(wd) else: exc = None if ocgis.vm.rank == 0: # The working directory must not exist to proceed. if os.path.exists(wd): exc = ValueError("Working directory {} must not exist.".format(wd)) else: # Make the working directory nesting as needed. os.makedirs(wd) exc = ocgis.vm.bcast(exc) if exc is not None: raise exc if merge and not spatial_subset or (spatial_subset and genweights): if _is_subdir_(wd, weight): raise ValueError( 'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.') # Create the source and destination request datasets. rd_src = _create_request_dataset_(source, esmf_src_type, data_variables=data_variables) rd_dst = _create_request_dataset_(destination, esmf_dst_type) # Execute a spatial subset if requested. paths = None if spatial_subset: if spatial_subset_path is None: spatial_subset_path = os.path.join(wd, 'spatial_subset.nc') msg = "Executing spatial subset. Output path is: {}".format(spatial_subset_path) ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path, src_resmax=src_resolution) # Only split grids if a spatial subset is not requested. else: # Update the paths to use for the grid. paths = {'wd': wd} # Arguments to ESMF regridding. esmf_kwargs = {'regrid_method': esmf_regrid_method, 'ignore_degenerate': ignore_degenerate} # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset. gs = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks_dst, src_grid_resolution=src_resolution, paths=paths, dst_grid_resolution=dst_resolution, buffer_value=buffer_distance, redistribute=True, genweights=genweights, esmf_kwargs=esmf_kwargs, use_spatial_decomp='auto', eager=eager) # Write subsets and generate weights if requested in the grid splitter. # TODO: Need a weight only option. If chunks are written, then weights are written... if not spatial_subset and nchunks_dst is not None: msg = "Starting main chunking loop..." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) gs.write_chunks() else: if spatial_subset: source = spatial_subset_path if genweights: msg = "Writing ESMF weights..." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) gs.write_esmf_weights(source, destination, weight) # Create the global weight file. This does not apply to spatial subsets because there will always be one weight # file. if merge and not spatial_subset: # Weight file merge only works in serial. exc = None with ocgis.vm.scoped('weight file merge', [0]): if not ocgis.vm.is_null: msg = "Merging chunked weight files to global file. Output global weight file is: {}".format(weight) ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) gs.create_merged_weight_file(weight) excs = ocgis.vm.gather(exc) excs = ocgis.vm.bcast(excs) for exc in excs: if exc is not None: raise exc ocgis.vm.barrier() # Remove the working directory unless the persist flag is provided. if not persist: if ocgis.vm.rank == 0: msg = "Removing working directory since persist is False." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) shutil.rmtree(wd) ocgis.vm.barrier() ocgis_lh(msg="Success!", level=logging.INFO, logger=CRWG_LOG) return 0
def test_system_negative_values_in_spherical_grid(self): original_dir = os.getcwd() try: xcn = np.arange(-10, 350, step=10, dtype=float) xc = np.arange(0, 360, step=10, dtype=float) yc = np.arange(-90, 100, step=10, dtype=float) xvn = Variable("lon", xcn, dimensions=["lon"]) xv = Variable("lon", xc, dimensions=["lon"]) yv = Variable("lat", yc, dimensions=["lat"]) gridn = Grid(x=xvn.copy(), y=yv.copy(), crs=Spherical()) gridu = Grid(x=xv.copy(), y=yv.copy(), crs=Spherical()) gridw = create_gridxy_global(5, with_bounds=False, crs=Spherical()) grids = [gridn, gridu, gridw] for ctr, (src, dst) in enumerate(itertools.product(grids, grids)): os.chdir(self.current_dir_output) gdirname = "grid-ctr-{}".format(ctr) self.dprint(gdirname) griddir = os.path.join(self.current_dir_output, gdirname) os.mkdir(gdirname) os.chdir(gdirname) srcgridname = "gridn.nc" src.parent.write(srcgridname) dstgridname = "grid.nc" dst.parent.write(dstgridname) nchunks_dst = [(4, 1), (3, 1), (2, 1), (1, 1)] for ctr, n in enumerate(nchunks_dst): os.chdir(griddir) dirname = 'ctr-{}'.format(ctr) os.mkdir(dirname) os.chdir(dirname) wd = os.getcwd() self.dprint("current chunks", n) g = GridChunker(src, dst, nchunks_dst=n, genweights=True, paths={'wd': wd}, esmf_kwargs={'regrid_method': 'BILINEAR'}) if not g.is_one_chunk: g.write_chunks() g.create_merged_weight_file( os.path.join(griddir, "ctr-{}".format(ctr), "merged-weights.nc")) else: g.write_esmf_weights( os.path.join(griddir, srcgridname), os.path.join(griddir, dstgridname), os.path.join(griddir, "global-weights.nc")) os.chdir(griddir) for ctr in range(0, len(nchunks_dst) - 1): src_filename = os.path.join(griddir, "ctr-{}".format(ctr), "merged-weights.nc") dst_filename = os.path.join(griddir, "global-weights.nc") self.assertWeightFilesEquivalent(src_filename, dst_filename) finally: os.chdir(original_dir)