def apply_by_spatial_chunk(src_filename, dst_filename, nchunks, chunk_idx, **kwargs): """ Create a spatial chunk from source and destination CF-Grid NetCDF files. Each source and destination chunk is converted to a :class:`xarray.Dataset`. See :class:`~ocgis.spatial.grid_chunker.GridChunker` for more documentation on the spatial chunking. Returns `0` if the chunking is successful. :param str src_filename: Path to source NetCDF file. :param str dst_filename: Path to destination NetCDF file. :param nchunks: The chunking decomposition for the destination grid. See :class:`~ocgis.spatial.grid_chunker.GridChunker`. :type nchunks: tuple(int, ...) :param int chunk_idx: The target chunk index. :param kwargs: Extra keyword arguments to :class:`~ocgis.spatial.grid_chunker.GridChunker` initialization. :rtype: int """ rc = 1 rd_src = ocgis.RequestDataset(src_filename) rd_dst = ocgis.RequestDataset(dst_filename) gc = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks, **kwargs) for ctr, (src_grid, src_slice, dst_grid, dst_slice) in enumerate(gc.iter_src_grid_subsets(yield_dst=True, yield_idx=chunk_idx)): xsrc = src_grid.parent.to_xarray(decode_cf=False) xdst = dst_grid.parent.to_xarray(decode_cf=False) rc = 0 assert ctr == 0 # Ensure we only have a single loop return rc
def test_create_merged_weight_file_unstructured(self): import ESMF # Create an isomorphic source UGRID file. ufile = self.get_temporary_file_path('ugrid.nc') resolution = 10. self.fixture_regular_ugrid_file(ufile, resolution, crs=Spherical()) src_grid = RequestDataset(ufile, driver=DriverNetcdfUGRID, grid_abstraction='point').get().grid self.assertEqual(src_grid.abstraction, 'point') # Create a logically rectangular destination grid file. dst_grid = self.get_gridxy_global(resolution=20., crs=Spherical()) dst_path = self.get_temporary_file_path('dst.nc') dst_grid.parent.write(dst_path) # Create the grid chunks. gs = GridChunker(src_grid, dst_grid, (3, 3), check_contains=False, src_grid_resolution=10., paths=self.fixture_paths, genweights=True) gs.write_chunks() # Merge weight files. mwf = self.get_temporary_file_path('merged_weight_file.nc') gs.create_merged_weight_file(mwf) # Generate a global weight file using ESMF. global_weights_filename = self.get_temporary_file_path('global_weights.nc') srcgrid = ESMF.Mesh(filename=ufile, filetype=ESMF.FileFormat.UGRID, meshname=VariableName.UGRID_HOST_VARIABLE) dstgrid = ESMF.Grid(filename=dst_path, filetype=ESMF.FileFormat.GRIDSPEC, add_corner_stagger=True) srcfield = ESMF.Field(grid=srcgrid, meshloc=ESMF.MeshLoc.ELEMENT) dstfield = ESMF.Field(grid=dstgrid) _ = ESMF.Regrid(srcfield=srcfield, dstfield=dstfield, filename=global_weights_filename, regrid_method=ESMF.RegridMethod.CONSERVE) # Test merged and global weight files are equivalent. self.assertWeightFilesEquivalent(global_weights_filename, mwf)
def run_system_splitting_unstructured(self, genweights): env.CLOBBER_UNITS_ON_BOUNDS = False ufile = self.get_temporary_file_path('ugrid.nc') resolution = 10. self.fixture_regular_ugrid_file(ufile, resolution) src_rd = RequestDataset(ufile, driver=DriverNetcdfUGRID, grid_abstraction='point') # src_rd.inspect() src_grid = src_rd.get().grid self.assertEqual(src_grid.abstraction, 'point') dst_grid = self.get_gridxy_global(resolution=20., crs=Spherical()) gs = GridChunker(src_grid, dst_grid, (3, 3), check_contains=False, src_grid_resolution=10., paths=self.fixture_paths, genweights=genweights, use_spatial_decomp=True) gs.write_chunks() actual = gs.create_full_path_from_template('src_template', index=1) actual = RequestDataset(actual).get() self.assertIn(GridChunkerConstants.IndexFile.NAME_SRCIDX_GUID, actual)
def chunked_smm(wd, index_path, insert_weighted, destination, data_variables): if wd is None: wd = os.getcwd() if data_variables != 'auto': data_variables = data_variables.split(',') if index_path is None: index_path = os.path.join( wd, constants.GridChunkerConstants.DEFAULT_PATHS['index_file']) ocgis.vm.barrier() assert os.path.exists(index_path) if insert_weighted: if destination is None: raise ValueError( 'If --insert_weighted, then "destination" must be provided.') # ------------------------------------------------------------------------------------------------------------------ GridChunker.smm(index_path, wd, data_variables=data_variables) if insert_weighted: with ocgis.vm.scoped_barrier(first=True, last=True): with ocgis.vm.scoped('insert weighted', [0]): if not ocgis.vm.is_null: GridChunker.insert_weighted(index_path, wd, destination, data_variables=data_variables)
def test_system_scrip_destination_splitting(self): """Test splitting a SCRIP destination grid.""" src_grid = create_gridxy_global() dst_grid = self.fixture_driver_scrip_netcdf_field().grid gs = GridChunker(src_grid, dst_grid, (3,), paths={'wd': self.current_dir_output}) gs.write_chunks() self.assertEqual(len(os.listdir(self.current_dir_output)), 7)
def test_system_scrip_destination_splitting(self): """Test splitting a SCRIP destination grid.""" src_grid = create_gridxy_global() dst_grid = self.fixture_driver_scrip_netcdf_field().grid gc = GridChunker(src_grid, dst_grid, (3, ), paths={'wd': self.current_dir_output}) gc.write_chunks() self.assertEqual(len(os.listdir(self.current_dir_output)), 7)
def test_system_grid_chunking(self): if vm.size != 4: raise SkipTest('vm.size != 4') from ocgis.spatial.grid_chunker import GridChunker path = self.path_esmf_unstruct rd_dst = RequestDataset(uri=path, driver=DriverESMFUnstruct, crs=Spherical(), grid_abstraction='point', grid_is_isomorphic=True) rd_src = deepcopy(rd_dst) resolution = 0.28125 chunk_wd = os.path.join(self.current_dir_output, 'chunks') if vm.rank == 0: os.mkdir(chunk_wd) vm.barrier() paths = {'wd': chunk_wd} gc = GridChunker(rd_src, rd_dst, nchunks_dst=[8], src_grid_resolution=resolution, dst_grid_resolution=resolution, optimized_bbox_subset=True, paths=paths, genweights=True) gc.write_chunks() dist = OcgDist() local_ctr = Dimension(name='ctr', size=8, dist=True) dist.add_dimension(local_ctr) dist.update_dimension_bounds() for ctr in range(local_ctr.bounds_local[0], local_ctr.bounds_local[1]): ctr += 1 s = os.path.join(chunk_wd, 'split_src_{}.nc'.format(ctr)) d = os.path.join(chunk_wd, 'split_dst_{}.nc'.format(ctr)) sf = Field.read(s, driver=DriverESMFUnstruct) df = Field.read(d, driver=DriverESMFUnstruct) self.assertLessEqual(sf.grid.shape[0] - df.grid.shape[0], 150) self.assertGreater(sf.grid.shape[0], df.grid.shape[0]) wgt = os.path.join(chunk_wd, 'esmf_weights_{}.nc'.format(ctr)) f = Field.read(wgt) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0) with vm.scoped('merge weights', [0]): if not vm.is_null: merged_weights = self.get_temporary_file_path('merged_weights.nc') gc.create_merged_weight_file(merged_weights, strict=False) f = Field.read(merged_weights) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0)
def test_init(self): # Test optimizations are chosen appropriately. grid = mock.create_autospec(Grid) grid.ndim = 2 grid.resolution_max = 10 self.assertIsInstance(grid, Grid) gridu = mock.create_autospec(GridUnstruct) gridu.resolution_max = None self.assertIsInstance(gridu, GridUnstruct) for g in [grid, gridu]: g._gc_initialize_ = mock.Mock() g.parent = mock.Mock() gs = GridChunker(gridu, grid, (3, 4), paths=self.fixture_paths) self.assertFalse(gs.optimized_bbox_subset) self.assertTrue(gs.eager) gs = GridChunker(gridu, grid, (3, 4), src_grid_resolution=1.0, dst_grid_resolution=2.0, paths=self.fixture_paths) self.assertTrue(gs.optimized_bbox_subset) self.assertFalse(gs.use_spatial_decomp) # Test spatial decomposition is chosen appropriately. gc = GridChunker(grid, gridu) self.assertTrue(gc.use_spatial_decomp) # Test ESMF keyword arguments. mock_ESMF = mock.Mock() with mock.patch.dict(sys.modules, {'ESMF': mock_ESMF}): esmf_kwargs = {'ignore_degenerate': True} gs = self.fixture_grid_chunker(genweights=True, esmf_kwargs=esmf_kwargs) self.assertGreaterEqual(len(gs.esmf_kwargs), 2) self.assertTrue(gs.genweights) self.assertTrue(gs.esmf_kwargs['ignore_degenerate'])
def test_create_merged_weight_file(self): import ESMF path_src = self.get_temporary_file_path('src.nc') path_dst = self.get_temporary_file_path('dst.nc') src_grid = create_gridxy_global(resolution=30.0, wrapped=False, crs=Spherical()) dst_grid = create_gridxy_global(resolution=35.0, wrapped=False, crs=Spherical()) src_grid.write(path_src) dst_grid.write(path_dst) # Split source and destination grids --------------------------------------------------------------------------- gs = GridChunker(src_grid, dst_grid, (2, 2), check_contains=False, allow_masked=True, paths=self.fixture_paths, genweights=True) gs.write_chunks() # Merge weight files ------------------------------------------------------------------------------------------- merged_weight_filename = self.get_temporary_file_path('merged_weights.nc') gs.create_merged_weight_file(merged_weight_filename) # Generate a global weight file using ESMF --------------------------------------------------------------------- global_weights_filename = self.get_temporary_file_path('global_weights.nc') srcgrid = ESMF.Grid(filename=path_src, filetype=ESMF.FileFormat.GRIDSPEC, add_corner_stagger=True) dstgrid = ESMF.Grid(filename=path_dst, filetype=ESMF.FileFormat.GRIDSPEC, add_corner_stagger=True) srcfield = ESMF.Field(grid=srcgrid) dstfield = ESMF.Field(grid=dstgrid) _ = ESMF.Regrid(srcfield=srcfield, dstfield=dstfield, filename=global_weights_filename, regrid_method=ESMF.RegridMethod.CONSERVE) # Test merged and global weight files are equivalent ----------------------------------------------------------- self.assertWeightFilesEquivalent(global_weights_filename, merged_weight_filename)
def test_system_regrid_target_types(self): """Test grids are retrieved from the supported input regrid target types.""" mGrid = mock.create_autospec(Grid, spec_set=True, instance=True) mGrid.parent = mock.Mock() type(mGrid).ndim = PropertyMock(return_value=2) def _create_mField_(): mField = mock.create_autospec(Field, spec_set=True, instance=True) p_grid = PropertyMock(return_value=mGrid) type(mField).grid = p_grid return mField, p_grid def _create_mRequestDataset_(): ret = mock.create_autospec(RequestDataset, spec_set=True, instance=True) ret.create_field = mock.Mock(return_value=_create_mField_()[0]) return ret # Test with request datasets. source = _create_mRequestDataset_() destination = _create_mRequestDataset_() gs = GridChunker(source, destination, (1, 1)) for t in [source, destination]: t.create_field.assert_called_once() for t in [gs.src_grid, gs.dst_grid]: self.assertEqual(t, mGrid) # Test with fields. source, psource = _create_mField_() destination, pdestination = _create_mField_() gs = GridChunker(source, destination, (1, 1)) for t in [psource, pdestination]: t.assert_called_once_with() for t in [gs.src_grid, gs.dst_grid]: self.assertEqual(t, mGrid)
def chunked_smm(wd, index_path, insert_weighted, destination, data_variables): if wd is None: wd = os.getcwd() if data_variables != 'auto': data_variables = data_variables.split(',') if index_path is None: index_path = os.path.join(wd, constants.GridChunkerConstants.DEFAULT_PATHS['index_file']) ocgis.vm.barrier() assert os.path.exists(index_path) if insert_weighted: if destination is None: raise ValueError('If --insert_weighted, then "destination" must be provided.') # ------------------------------------------------------------------------------------------------------------------ GridChunker.smm(index_path, wd, data_variables=data_variables) if insert_weighted: with ocgis.vm.scoped_barrier(first=True, last=True): with ocgis.vm.scoped('insert weighted', [0]): if not ocgis.vm.is_null: GridChunker.insert_weighted(index_path, wd, destination, data_variables=data_variables)
def test_system_grid_chunking(self): if vm.size != 4: raise SkipTest('vm.size != 4') from ocgis.spatial.grid_chunker import GridChunker path = self.path_esmf_unstruct rd_dst = RequestDataset(uri=path, driver=DriverESMFUnstruct, crs=Spherical(), grid_abstraction='point', grid_is_isomorphic=True) rd_src = deepcopy(rd_dst) resolution = 0.28125 chunk_wd = os.path.join(self.current_dir_output, 'chunks') if vm.rank == 0: os.mkdir(chunk_wd) vm.barrier() paths = {'wd': chunk_wd} gc = GridChunker(rd_src, rd_dst, nchunks_dst=[8], src_grid_resolution=resolution, dst_grid_resolution=resolution, optimized_bbox_subset=True, paths=paths, genweights=True) gc.write_chunks() dist = OcgDist() local_ctr = Dimension(name='ctr', size=8, dist=True) dist.add_dimension(local_ctr) dist.update_dimension_bounds() for ctr in range(local_ctr.bounds_local[0], local_ctr.bounds_local[1]): ctr += 1 s = os.path.join(chunk_wd, 'split_src_{}.nc'.format(ctr)) d = os.path.join(chunk_wd, 'split_dst_{}.nc'.format(ctr)) sf = Field.read(s, driver=DriverESMFUnstruct) df = Field.read(d, driver=DriverESMFUnstruct) self.assertGreater(sf.grid.shape[0], df.grid.shape[0]) wgt = os.path.join(chunk_wd, 'esmf_weights_{}.nc'.format(ctr)) f = Field.read(wgt) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0) with vm.scoped('merge weights', [0]): if not vm.is_null: merged_weights = self.get_temporary_file_path( 'merged_weights.nc') gc.create_merged_weight_file(merged_weights, strict=False) f = Field.read(merged_weights) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0)
def test_create_merged_weight_file(self): import ESMF path_src = self.get_temporary_file_path('src.nc') path_dst = self.get_temporary_file_path('dst.nc') src_grid = create_gridxy_global(resolution=30.0, wrapped=False, crs=Spherical()) dst_grid = create_gridxy_global(resolution=35.0, wrapped=False, crs=Spherical()) src_grid.write(path_src) dst_grid.write(path_dst) # Split source and destination grids --------------------------------------------------------------------------- gs = GridChunker(src_grid, dst_grid, (2, 2), check_contains=False, allow_masked=True, paths=self.fixture_paths, genweights=True) gs.write_chunks() # Merge weight files ------------------------------------------------------------------------------------------- merged_weight_filename = self.get_temporary_file_path( 'merged_weights.nc') gs.create_merged_weight_file(merged_weight_filename) # Generate a global weight file using ESMF --------------------------------------------------------------------- global_weights_filename = self.get_temporary_file_path( 'global_weights.nc') srcgrid = ESMF.Grid(filename=path_src, filetype=ESMF.FileFormat.GRIDSPEC, add_corner_stagger=True) dstgrid = ESMF.Grid(filename=path_dst, filetype=ESMF.FileFormat.GRIDSPEC, add_corner_stagger=True) srcfield = ESMF.Field(grid=srcgrid) dstfield = ESMF.Field(grid=dstgrid) _ = ESMF.Regrid(srcfield=srcfield, dstfield=dstfield, filename=global_weights_filename, regrid_method=ESMF.RegridMethod.CONSERVE) # Test merged and global weight files are equivalent ----------------------------------------------------------- self.assertWeightFilesEquivalent(global_weights_filename, merged_weight_filename)
def fixture_grid_chunker(self, **kwargs): src_grid = self.get_gridxy_global(wrapped=False, with_bounds=True) dst_grid = self.get_gridxy_global(wrapped=False, with_bounds=True, resolution=0.5) self.add_data_variable_to_grid(src_grid) self.add_data_variable_to_grid(dst_grid) defaults = { 'source': src_grid, 'destination': dst_grid, 'paths': self.fixture_paths, 'nchunks_dst': (2, 3) } defaults.update(kwargs) gs = GridChunker(**defaults) return gs
def test_create_merged_weight_file_unstructured(self): import ESMF # Create an isomorphic source UGRID file. ufile = self.get_temporary_file_path('ugrid.nc') resolution = 10. self.fixture_regular_ugrid_file(ufile, resolution, crs=Spherical()) src_grid = RequestDataset(ufile, driver=DriverNetcdfUGRID, grid_abstraction='point').get().grid self.assertEqual(src_grid.abstraction, 'point') # Create a logically rectangular destination grid file. dst_grid = self.get_gridxy_global(resolution=20., crs=Spherical()) dst_path = self.get_temporary_file_path('dst.nc') dst_grid.parent.write(dst_path) # Create the grid chunks. gs = GridChunker(src_grid, dst_grid, (3, 3), check_contains=False, src_grid_resolution=10., paths=self.fixture_paths, genweights=True) gs.write_chunks() # Merge weight files. mwf = self.get_temporary_file_path('merged_weight_file.nc') gs.create_merged_weight_file(mwf) # Generate a global weight file using ESMF. global_weights_filename = self.get_temporary_file_path( 'global_weights.nc') srcgrid = ESMF.Mesh(filename=ufile, filetype=ESMF.FileFormat.UGRID, meshname=VariableName.UGRID_HOST_VARIABLE) dstgrid = ESMF.Grid(filename=dst_path, filetype=ESMF.FileFormat.GRIDSPEC, add_corner_stagger=True) srcfield = ESMF.Field(grid=srcgrid, meshloc=ESMF.MeshLoc.ELEMENT) dstfield = ESMF.Field(grid=dstgrid) _ = ESMF.Regrid(srcfield=srcfield, dstfield=dstfield, filename=global_weights_filename, regrid_method=ESMF.RegridMethod.CONSERVE) # Test merged and global weight files are equivalent. self.assertWeightFilesEquivalent(global_weights_filename, mwf)
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type, esmf_dst_type, genweights, esmf_regrid_method, spatial_subset, src_resolution, dst_resolution, buffer_distance, wd, persist, eager, ignore_degenerate, data_variables, spatial_subset_path, verbose, loglvl): if verbose: ocgis_lh.configure(to_stream=True, level=getattr(logging, loglvl)) ocgis_lh(msg="Starting Chunked Regrid Weight Generation", level=logging.INFO, logger=CRWG_LOG) if not ocgis.env.USE_NETCDF4_MPI: msg = ('env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is ' 'netCDF4-python built with parallel support?') ocgis_lh(msg, level=logging.WARN, logger=CRWG_LOG, force=True) if data_variables is not None: data_variables = data_variables.split(',') if nchunks_dst is not None: # Format the chunking decomposition from its string representation. if ',' in nchunks_dst: nchunks_dst = nchunks_dst.split(',') else: nchunks_dst = [nchunks_dst] nchunks_dst = tuple([int(ii) for ii in nchunks_dst]) if merge: if not spatial_subset and weight is None: raise ValueError('"weight" must be a valid path if --merge') if spatial_subset and genweights and weight is None: raise ValueError('"weight" must be a valid path if --genweights') # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets # and it is not a merge only operation. if wd is None: if ocgis.vm.rank == 0: wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_') wd = ocgis.vm.bcast(wd) else: exc = None if ocgis.vm.rank == 0: # The working directory must not exist to proceed. if os.path.exists(wd): exc = ValueError("Working directory {} must not exist.".format(wd)) else: # Make the working directory nesting as needed. os.makedirs(wd) exc = ocgis.vm.bcast(exc) if exc is not None: raise exc if merge and not spatial_subset or (spatial_subset and genweights): if _is_subdir_(wd, weight): raise ValueError( 'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.') # Create the source and destination request datasets. rd_src = _create_request_dataset_(source, esmf_src_type, data_variables=data_variables) rd_dst = _create_request_dataset_(destination, esmf_dst_type) # Execute a spatial subset if requested. paths = None if spatial_subset: if spatial_subset_path is None: spatial_subset_path = os.path.join(wd, 'spatial_subset.nc') msg = "Executing spatial subset. Output path is: {}".format(spatial_subset_path) ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path, src_resmax=src_resolution) # Only split grids if a spatial subset is not requested. else: # Update the paths to use for the grid. paths = {'wd': wd} # Arguments to ESMF regridding. esmf_kwargs = {'regrid_method': esmf_regrid_method, 'ignore_degenerate': ignore_degenerate} # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset. gs = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks_dst, src_grid_resolution=src_resolution, paths=paths, dst_grid_resolution=dst_resolution, buffer_value=buffer_distance, redistribute=True, genweights=genweights, esmf_kwargs=esmf_kwargs, use_spatial_decomp='auto', eager=eager) # Write subsets and generate weights if requested in the grid splitter. # TODO: Need a weight only option. If chunks are written, then weights are written... if not spatial_subset and nchunks_dst is not None: msg = "Starting main chunking loop..." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) gs.write_chunks() else: if spatial_subset: source = spatial_subset_path if genweights: msg = "Writing ESMF weights..." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) gs.write_esmf_weights(source, destination, weight) # Create the global weight file. This does not apply to spatial subsets because there will always be one weight # file. if merge and not spatial_subset: # Weight file merge only works in serial. exc = None with ocgis.vm.scoped('weight file merge', [0]): if not ocgis.vm.is_null: msg = "Merging chunked weight files to global file. Output global weight file is: {}".format(weight) ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) gs.create_merged_weight_file(weight) excs = ocgis.vm.gather(exc) excs = ocgis.vm.bcast(excs) for exc in excs: if exc is not None: raise exc ocgis.vm.barrier() # Remove the working directory unless the persist flag is provided. if not persist: if ocgis.vm.rank == 0: msg = "Removing working directory since persist is False." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) shutil.rmtree(wd) ocgis.vm.barrier() ocgis_lh(msg="Success!", level=logging.INFO, logger=CRWG_LOG) return 0
def test_write_esmf_weights(self): # Create source and destination fields. This is the identity test, so the source and destination fields are # equivalent. src_grid = create_gridxy_global(resolution=3.0, crs=Spherical()) # Only test masking in serial to make indexing easier...just being lazy if vm.size == 1: mask = src_grid.get_mask(create=True) mask[4, 5] = True mask[25, 27] = True src_grid.set_mask(mask) self.assertEqual(src_grid.get_mask().sum(), 2) src_field = create_exact_field(src_grid, 'foo', ntime=3) dst_field = deepcopy(src_field) # Write the fields to disk for use in global file reconstruction and testing. if vm.rank == 0: master_path = self.get_temporary_file_path('foo.nc') src_field_path = self.get_temporary_file_path('src_field.nc') else: master_path = None src_field_path = None master_path = vm.bcast(master_path) src_field_path = vm.bcast(src_field_path) assert not os.path.exists(master_path) dst_field.write(master_path) src_field.write(src_field_path) # Remove the destination data variable to test its creation and filling dst_field.remove_variable('foo') # Chunk the fields and generate weights paths = {'wd': self.current_dir_output} gc = GridChunker(src_field, dst_field, nchunks_dst=(2, 2), genweights=True, paths=paths, esmf_kwargs={'regrid_method': 'BILINEAR'}) gc.write_chunks() # This is the path to the index file describing how to reconstruct the grid file index_path = os.path.join(self.current_dir_output, gc.paths['index_file']) # Execute the sparse matrix multiplication using weights read from file gc.smm(index_path, paths['wd']) with vm.scoped('index and reconstruct', [0]): if not vm.is_null: # Reconstruct the global destination file gc.insert_weighted(index_path, self.current_dir_output, master_path) # Load the actual values from file (destination) actual_field = RequestDataset(master_path).create_field() actual = actual_field.data_variables[0].mv() # Load the desired data from file (original values in the source field) desired = RequestDataset(src_field_path).create_field().data_variables[0].mv() if vm.size_global == 1: # Masking is only tested in serial self.assertEqual(actual_field.grid.get_mask().sum(), 2) else: self.assertIsNone(actual_field.grid.get_mask()) self.assertNumpyAll(actual, desired)
select = np.logical_or(select, initial) return select if __name__ == '__main__': # ------------------------------------------------------------------------------------------------------------------ # Grid splitter implementation resolution = 1. / 111. # resolution = 1. grid = create_gridxy_global(resolution=resolution, wrapped=False, crs=ocgis.crs.Spherical()) field = create_exact_field(grid, 'exact', ntime=3, fill_data_var=False, crs=ocgis.crs.Spherical()) field.write(os.path.join(OUTDIR, 'dst_field_1km.nc')) gs = GridChunker(grid, grid, (10, 10)) ctr = 1 for grid_sub in gs.iter_dst_grid_subsets(): subset_filename = os.path.join(OUTDIR, 'src_subset_{}.nc'.format(ctr)) dst_subset_filename = os.path.join(OUTDIR, 'dst_subset_{}.nc'.format(ctr)) if vm.rank == 0: print 'creating subset:', subset_filename with vm.scoped_by_emptyable('grid subset', grid_sub): if not vm.is_null: extent_global = grid_sub.extent_global if vm.rank == 0: root = vm.rank_global
def run_create_merged_weight_file(self, filemode): import ESMF esmf_filemode = getattr(ESMF.FileMode, filemode) path_src = self.get_temporary_file_path('src.nc') path_dst = self.get_temporary_file_path('dst.nc') src_grid = create_gridxy_global(resolution=30.0, wrapped=False, crs=Spherical()) dst_grid = create_gridxy_global(resolution=35.0, wrapped=False, crs=Spherical()) src_grid.write(path_src) dst_grid.write(path_dst) # Split source and destination grids --------------------------------------------------------------------------- src_rd = RequestDataset(path_src, driver='netcdf-cf') dst_rd = RequestDataset(path_dst, driver='netcdf-cf') gs = GridChunker(src_rd, dst_rd, (2, 2), check_contains=False, allow_masked=True, paths=self.fixture_paths, genweights=True, filemode=filemode) gs.write_chunks() if filemode == "WITHAUX": weightfile = self.get_temporary_file_path('esmf_weights_1.nc') vc = RequestDataset(weightfile, driver='netcdf').create_field() self.assertGreater(len(vc.keys()), 3) weightfile = self.get_temporary_file_path('esmf_weights_2.nc') vc = RequestDataset(weightfile, driver='netcdf').get() self.assertEqual(len(vc.keys()), 3) # Merge weight files ------------------------------------------------------------------------------------------- merged_weight_filename = self.get_temporary_file_path( 'merged_weights.nc') gs.create_merged_weight_file(merged_weight_filename) nvars = len( RequestDataset(merged_weight_filename, driver='netcdf').get().keys()) if filemode == "WITHAUX": self.assertGreater(nvars, 3) else: self.assertEqual(nvars, 3) # Generate a global weight file using ESMF --------------------------------------------------------------------- global_weights_filename = self.get_temporary_file_path( 'global_weights.nc') srcgrid = ESMF.Grid(filename=path_src, filetype=ESMF.FileFormat.GRIDSPEC, add_corner_stagger=True) dstgrid = ESMF.Grid(filename=path_dst, filetype=ESMF.FileFormat.GRIDSPEC, add_corner_stagger=True) srcfield = ESMF.Field(grid=srcgrid) dstfield = ESMF.Field(grid=dstgrid) _ = ESMF.Regrid(srcfield=srcfield, dstfield=dstfield, filename=global_weights_filename, regrid_method=ESMF.RegridMethod.CONSERVE, filemode=esmf_filemode, src_file=path_src, dst_file=path_dst, src_file_type=ESMF.FileFormat.GRIDSPEC, dst_file_type=ESMF.FileFormat.GRIDSPEC) # Test merged and global weight files are equivalent ----------------------------------------------------------- self.assertWeightFilesEquivalent(global_weights_filename, merged_weight_filename)
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type, esmf_dst_type, genweights, esmf_regrid_method, spatial_subset, src_resolution, dst_resolution, buffer_distance, wd, persist, eager, ignore_degenerate): if not ocgis.env.USE_NETCDF4_MPI: msg = ('env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is ' 'netCDF4-python built with parallel support?') ocgis_lh(msg, level=logging.WARN, logger='ocli.chunked_rwg', force=True) if nchunks_dst is not None: # Format the chunking decomposition from its string representation. if ',' in nchunks_dst: nchunks_dst = nchunks_dst.split(',') else: nchunks_dst = [nchunks_dst] nchunks_dst = tuple([int(ii) for ii in nchunks_dst]) if merge: if not spatial_subset and weight is None: raise ValueError('"weight" must be a valid path if --merge') if spatial_subset and genweights and weight is None: raise ValueError('"weight" must be a valid path if --genweights') # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets # and it is not a merge only operation. if wd is None: if ocgis.vm.rank == 0: wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_') wd = ocgis.vm.bcast(wd) else: if ocgis.vm.rank == 0: # The working directory must not exist to proceed. if os.path.exists(wd): raise ValueError("Working directory 'wd' must not exist.") else: # Make the working directory nesting as needed. os.makedirs(wd) ocgis.vm.barrier() if merge and not spatial_subset or (spatial_subset and genweights): if _is_subdir_(wd, weight): raise ValueError( 'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.') # Create the source and destination request datasets. rd_src = _create_request_dataset_(source, esmf_src_type) rd_dst = _create_request_dataset_(destination, esmf_dst_type) # Execute a spatial subset if requested. paths = None if spatial_subset: # TODO: This path should be customizable. spatial_subset_path = os.path.join(wd, 'spatial_subset.nc') _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path) # Only split grids if a spatial subset is not requested. else: # Update the paths to use for the grid. paths = {'wd': wd} # Arguments to ESMF regridding. esmf_kwargs = {'regrid_method': esmf_regrid_method, 'ignore_degenerate': ignore_degenerate} # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset. gs = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks_dst, src_grid_resolution=src_resolution, paths=paths, dst_grid_resolution=dst_resolution, buffer_value=buffer_distance, redistribute=True, genweights=genweights, esmf_kwargs=esmf_kwargs, use_spatial_decomp='auto', eager=eager) # Write subsets and generate weights if requested in the grid splitter. # TODO: Need a weight only option. If chunks are written, then weights are written... if not spatial_subset and nchunks_dst is not None: gs.write_chunks() else: if spatial_subset: source = spatial_subset_path if genweights: gs.write_esmf_weights(source, destination, weight) # Create the global weight file. This does not apply to spatial subsets because there will always be one weight # file. if merge and not spatial_subset: # Weight file merge only works in serial. exc = None with ocgis.vm.scoped('weight file merge', [0]): if not ocgis.vm.is_null: gs.create_merged_weight_file(weight) excs = ocgis.vm.gather(exc) excs = ocgis.vm.bcast(excs) for exc in excs: if exc is not None: raise exc ocgis.vm.barrier() # Remove the working directory unless the persist flag is provided. if not persist: if ocgis.vm.rank == 0: shutil.rmtree(wd) ocgis.vm.barrier() return 0
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type, esmf_dst_type, genweights, esmf_regrid_method, spatial_subset, src_resolution, dst_resolution, buffer_distance, wd, persist, eager, ignore_degenerate, data_variables, spatial_subset_path, verbose, loglvl, weightfilemode): # Used for creating the history string. the_locals = locals() if verbose: ocgis_lh.configure(to_stream=True, level=getattr(logging, loglvl)) ocgis_lh(msg="Starting Chunked Regrid Weight Generation", level=logging.INFO, logger=CRWG_LOG) if not ocgis.env.USE_NETCDF4_MPI: msg = ( 'env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is ' 'netCDF4-python built with parallel support?') ocgis_lh(msg, level=logging.WARN, logger=CRWG_LOG, force=True) if data_variables is not None: data_variables = data_variables.split(',') if nchunks_dst is not None: # Format the chunking decomposition from its string representation. if ',' in nchunks_dst: nchunks_dst = nchunks_dst.split(',') else: nchunks_dst = [nchunks_dst] nchunks_dst = tuple([int(ii) for ii in nchunks_dst]) if merge: if not spatial_subset and weight is None: raise ValueError('"weight" must be a valid path if --merge') if spatial_subset and genweights and weight is None: raise ValueError('"weight" must be a valid path if --genweights') # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets # and it is not a merge only operation. should_create_wd = (nchunks_dst is None or not all([ii == 1 for ii in nchunks_dst])) or spatial_subset if should_create_wd: if wd is None: if ocgis.vm.rank == 0: wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_') wd = ocgis.vm.bcast(wd) else: exc = None if ocgis.vm.rank == 0: # The working directory must not exist to proceed. if nchunks_dst is not None: if os.path.exists(wd): exc = ValueError( "Working directory {} must not exist.".format(wd)) else: # Make the working directory nesting as needed. os.makedirs(wd) exc = ocgis.vm.bcast(exc) if exc is not None: raise exc if merge and not spatial_subset or (spatial_subset and genweights): if _is_subdir_(wd, weight): raise ValueError( 'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.' ) # Create the source and destination request datasets. rd_src = _create_request_dataset_(source, esmf_src_type, data_variables=data_variables) rd_dst = _create_request_dataset_(destination, esmf_dst_type) # Execute a spatial subset if requested. paths = None if spatial_subset: if spatial_subset_path is None: spatial_subset_path = os.path.join(wd, 'spatial_subset.nc') msg = "Executing spatial subset. Output path is: {}".format( spatial_subset_path) ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path, src_resmax=src_resolution) # Only split grids if a spatial subset is not requested. else: # Update the paths to use for the grid. paths = {'wd': wd} # Arguments to ESMF regridding. esmf_kwargs = { 'regrid_method': esmf_regrid_method, 'ignore_degenerate': ignore_degenerate } # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset. gs = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks_dst, src_grid_resolution=src_resolution, paths=paths, dst_grid_resolution=dst_resolution, buffer_value=buffer_distance, redistribute=True, genweights=genweights, esmf_kwargs=esmf_kwargs, use_spatial_decomp='auto', eager=eager) # Write subsets and generate weights if requested in the grid splitter. # TODO: Need a weight only option. If chunks are written, then weights are written... if not spatial_subset and nchunks_dst is not None and not gs.is_one_chunk: msg = "Starting main chunking loop..." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) gs.write_chunks() else: if spatial_subset: source = spatial_subset_path if genweights: msg = "Writing ESMF weights..." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) handle_weight_file_check(weight) gs.write_esmf_weights(source, destination, weight, filemode=weightfilemode) # Create the global weight file. This does not apply to spatial subsets because there will always be one weight # file. if merge and not spatial_subset and not gs.is_one_chunk: # Weight file merge only works in serial. exc = None with ocgis.vm.scoped('weight file merge', [0]): if not ocgis.vm.is_null: msg = "Merging chunked weight files to global file. Output global weight file is: {}".format( weight) ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) handle_weight_file_check(weight) gs.create_merged_weight_file(weight) excs = ocgis.vm.gather(exc) excs = ocgis.vm.bcast(excs) for exc in excs: if exc is not None: raise exc ocgis.vm.barrier() # Append the history string if there is an output weight file. if weight and ocgis.vm.rank == 0: if os.path.exists(weight): # Add some additional stuff for record keeping import getpass import socket import datetime with nc.Dataset(weight, 'a') as ds: ds.setncattr('created_by_user', getpass.getuser()) ds.setncattr('created_on_hostname', socket.getfqdn()) ds.setncattr('history', create_history_string(the_locals)) ocgis.vm.barrier() # Remove the working directory unless the persist flag is provided. if not persist: if ocgis.vm.rank == 0: msg = "Removing working directory since persist is False." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) shutil.rmtree(wd) ocgis.vm.barrier() ocgis_lh(msg="Success!", level=logging.INFO, logger=CRWG_LOG) return 0
def test_write_esmf_weights(self): # Create source and destination fields. This is the identity test, so the source and destination fields are # equivalent. src_grid = create_gridxy_global(resolution=3.0, crs=Spherical()) # Only test masking in serial to make indexing easier...just being lazy if vm.size == 1: mask = src_grid.get_mask(create=True) mask[4, 5] = True mask[25, 27] = True src_grid.set_mask(mask) self.assertEqual(src_grid.get_mask().sum(), 2) src_field = create_exact_field(src_grid, 'foo', ntime=3) dst_field = deepcopy(src_field) # Write the fields to disk for use in global file reconstruction and testing. if vm.rank == 0: master_path = self.get_temporary_file_path('foo.nc') src_field_path = self.get_temporary_file_path('src_field.nc') else: master_path = None src_field_path = None master_path = vm.bcast(master_path) src_field_path = vm.bcast(src_field_path) assert not os.path.exists(master_path) dst_field.write(master_path) src_field.write(src_field_path) # Remove the destination data variable to test its creation and filling dst_field.remove_variable('foo') # Chunk the fields and generate weights paths = {'wd': self.current_dir_output} gc = GridChunker(src_field, dst_field, nchunks_dst=(2, 2), genweights=True, paths=paths, esmf_kwargs={'regrid_method': 'BILINEAR'}) gc.write_chunks() # This is the path to the index file describing how to reconstruct the grid file index_path = os.path.join(self.current_dir_output, gc.paths['index_file']) # Execute the sparse matrix multiplication using weights read from file gc.smm(index_path, paths['wd']) with vm.scoped('index and reconstruct', [0]): if not vm.is_null: # Reconstruct the global destination file gc.insert_weighted(index_path, self.current_dir_output, master_path) # Load the actual values from file (destination) actual_field = RequestDataset(master_path).create_field() actual = actual_field.data_variables[0].mv() # Load the desired data from file (original values in the source field) desired = RequestDataset( src_field_path).create_field().data_variables[0].mv() if vm.size_global == 1: # Masking is only tested in serial self.assertEqual(actual_field.grid.get_mask().sum(), 2) else: self.assertIsNone(actual_field.grid.get_mask()) self.assertNumpyAll(actual, desired)
def test_system_negative_values_in_spherical_grid(self): original_dir = os.getcwd() try: xcn = np.arange(-10, 350, step=10, dtype=float) xc = np.arange(0, 360, step=10, dtype=float) yc = np.arange(-90, 100, step=10, dtype=float) xvn = Variable("lon", xcn, dimensions=["lon"]) xv = Variable("lon", xc, dimensions=["lon"]) yv = Variable("lat", yc, dimensions=["lat"]) gridn = Grid(x=xvn.copy(), y=yv.copy(), crs=Spherical()) gridu = Grid(x=xv.copy(), y=yv.copy(), crs=Spherical()) gridw = create_gridxy_global(5, with_bounds=False, crs=Spherical()) grids = [gridn, gridu, gridw] for ctr, (src, dst) in enumerate(itertools.product(grids, grids)): os.chdir(self.current_dir_output) gdirname = "grid-ctr-{}".format(ctr) self.dprint(gdirname) griddir = os.path.join(self.current_dir_output, gdirname) os.mkdir(gdirname) os.chdir(gdirname) srcgridname = "gridn.nc" src.parent.write(srcgridname) dstgridname = "grid.nc" dst.parent.write(dstgridname) nchunks_dst = [(4, 1), (3, 1), (2, 1), (1, 1)] for ctr, n in enumerate(nchunks_dst): os.chdir(griddir) dirname = 'ctr-{}'.format(ctr) os.mkdir(dirname) os.chdir(dirname) wd = os.getcwd() self.dprint("current chunks", n) g = GridChunker(src, dst, nchunks_dst=n, genweights=True, paths={'wd': wd}, esmf_kwargs={'regrid_method': 'BILINEAR'}) if not g.is_one_chunk: g.write_chunks() g.create_merged_weight_file( os.path.join(griddir, "ctr-{}".format(ctr), "merged-weights.nc")) else: g.write_esmf_weights( os.path.join(griddir, srcgridname), os.path.join(griddir, dstgridname), os.path.join(griddir, "global-weights.nc")) os.chdir(griddir) for ctr in range(0, len(nchunks_dst) - 1): src_filename = os.path.join(griddir, "ctr-{}".format(ctr), "merged-weights.nc") dst_filename = os.path.join(griddir, "global-weights.nc") self.assertWeightFilesEquivalent(src_filename, dst_filename) finally: os.chdir(original_dir)