def ugrid_corner_plotting(path, show=True): # path = '/home/benkoziol/htmp/ugrid_splits/src_subset_1.nc' # path = '/home/benkoziol/l/data/ocgis/ugrid-cesm-subsetting/UGRID_1km-merge-10min_HYDRO1K-merge-nomask_c130402.nc' rd = RequestDataset(path) vc = rd.get_variable_collection() vc.load() face_nodes = vc['landmesh_face_node'].get_value() face_node_x = vc['landmesh_node_x'].get_value() face_node_y = vc['landmesh_node_y'].get_value() for ctr, idx in enumerate(range(face_nodes.shape[0])): if ctr % 1000 == 0: print '{} of {}'.format(ctr, face_nodes.shape[0]) curr_face_indices = face_nodes[idx, :] curr_face_node_x = face_node_x[curr_face_indices] curr_face_node_y = face_node_y[curr_face_indices] face_coords = np.zeros((4, 2)) face_coords[:, 0] = curr_face_node_x face_coords[:, 1] = curr_face_node_y plt.scatter(face_coords[:, 0], face_coords[:, 1], marker='o', color='b') if show: plt.show()
def main(): rd = RequestDataset(IN_PATH, driver=DriverNetcdfUGRID, grid_abstraction=GridAbstraction.POINT) field = rd.get() foo = '/tmp/foo.nc' # assert field.grid.cindex is not None # print field.grid.archetype # tkk print field.shapes sub = field.grid.get_intersects(box(*BBOX), optimized_bbox_subset=True).parent with vm.scoped_by_emptyable('reduce global', sub): if not vm.is_null: sub.grid_abstraction = GridAbstraction.POLYGON # rank_print('sub.grid.abstraction', sub.grid.abstraction) # rank_print('sub.grid._abstraction', sub.grid._abstraction) # rank_print('archetype', sub.grid.archetype) # rank_print(sub.grid.extent) rank_print('sub', sub.grid.cindex.get_value()) subr = sub.grid.reduce_global().parent rank_print('sub', subr.grid.cindex.get_value()) # rank_print(subr.x.name) # rank_print(subr.x.get_value().min()) rank_print(subr.grid.extent) # rank_print(subr.grid.cindex.get_value()) # rank_print(subr.shapes) # subr.write(foo) # if vm.rank == 0: # RequestDataset(foo).inspect() vm.barrier()
def check_spatial_overlap(): BLUE = '#6699cc' GRAY = '#999999' src_file = '/home/benkoziol/htmp/src_subset_57.nc' dst_file = '/home/benkoziol/htmp/dst_subset_57.nc' vc = RequestDataset(src_file).get_variable_collection() face_node_x = vc['landmesh_node_x'].get_value() face_node_y = vc['landmesh_node_y'].get_value() minx, maxx = face_node_x.min(), face_node_x.max() miny, maxy = face_node_y.min(), face_node_y.max() src_box = shapely.geometry.box(minx, miny, maxx, maxy) field = RequestDataset(dst_file).get() dst_box = field.grid.envelope print 'overlap={}'.format(src_box.intersects(dst_box)) src_patch = PolygonPatch(src_box, fc=BLUE, ec=BLUE, alpha=0.5, zorder=2) dst_patch = PolygonPatch(dst_box, fc=GRAY, ec=GRAY, alpha=0.5, zorder=1) fig = plt.figure(num=1) ax = fig.add_subplot(111) ax.add_patch(src_patch) ax.add_patch(dst_patch) minx, miny, maxx, maxy = dst_box.bounds w, h = maxx - minx, maxy - miny ax.set_xlim(minx - 0.2 * w, maxx + 0.2 * w) ax.set_ylim(miny - 0.2 * h, maxy + 0.2 * h) ax.set_aspect(1) plt.show()
def ugrid_corner_plotting(path, show=True): # path = '/home/benkoziol/htmp/ugrid_splits/src_subset_1.nc' # path = '/home/benkoziol/l/data/ocgis/ugrid-cesm-subsetting/UGRID_1km-merge-10min_HYDRO1K-merge-nomask_c130402.nc' rd = RequestDataset(path) vc = rd.get_raw_field() vc.load() face_nodes = vc['landmesh_face_node'].get_value() face_node_x = vc['landmesh_node_x'].get_value() face_node_y = vc['landmesh_node_y'].get_value() for ctr, idx in enumerate(range(face_nodes.shape[0])): if ctr % 1000 == 0: print '{} of {}'.format(ctr, face_nodes.shape[0]) curr_face_indices = face_nodes[idx, :] curr_face_node_x = face_node_x[curr_face_indices] curr_face_node_y = face_node_y[curr_face_indices] face_coords = np.zeros((4, 2)) face_coords[:, 0] = curr_face_node_x face_coords[:, 1] = curr_face_node_y plt.scatter(face_coords[:, 0], face_coords[:, 1], marker='o', color='b') if show: plt.show()
def test_system_spatial_subsetting(self): """Test spatial subsetting ESMF Unstructured format.""" bbox = shapely.geometry.box(*[-119.2, 61.7, -113.2, 62.7]) gvar = GeometryVariable(name='geom', value=bbox, is_bbox=True, dimensions='ngeom', crs=Spherical()) gvar.unwrap() rd = RequestDataset(uri=self.path_esmf_unstruct, driver=DriverESMFUnstruct, crs=Spherical(), grid_abstraction='point', grid_is_isomorphic=True) field = rd.create_field() sub, slc = field.grid.get_intersects(gvar, optimized_bbox_subset=True, return_slice=True) desired_extent = np.array( (240.890625, 61.8046875, 246.796875, 62.6484375)) self.assertGreaterEqual(len(vm.get_live_ranks_from_object(sub)), 1) with vm.scoped_by_emptyable('reduction', sub): if not vm.is_null: red = sub.reduce_global() self.assertNumpyAllClose(desired_extent, np.array(red.extent_global)) path = self.get_temporary_file_path('foo.nc', collective=True) with vm.scoped_by_emptyable('write', sub): if not vm.is_null: red.parent.write(path)
def _gc_remap_weight_variable_(self, ii, wvn, odata, src_indices, dst_indices, ifile, gidx, split_grids_directory=None): if wvn == 'S': pass else: ifc = GridChunkerConstants.IndexFile if wvn == 'row': is_unstruct = isinstance(self.dst_grid, GridUnstruct) if is_unstruct: dst_filename = ifile[gidx[ ifc.NAME_DESTINATION_VARIABLE]].join_string_value()[ii] dst_filename = os.path.join(split_grids_directory, dst_filename) oindices = RequestDataset(dst_filename).get()[ ifc.NAME_DSTIDX_GUID].get_value() else: y_bounds = ifile[gidx[ ifc.NAME_Y_DST_BOUNDS_VARIABLE]].get_value() x_bounds = ifile[gidx[ ifc.NAME_X_DST_BOUNDS_VARIABLE]].get_value() indices = dst_indices elif wvn == 'col': is_unstruct = isinstance(self.src_grid, GridUnstruct) if is_unstruct: src_filename = ifile[gidx[ ifc.NAME_SOURCE_VARIABLE]].join_string_value()[ii] src_filename = os.path.join(split_grids_directory, src_filename) oindices = RequestDataset(src_filename).get()[ ifc.NAME_SRCIDX_GUID].get_value() else: y_bounds = ifile[gidx[ ifc.NAME_Y_SRC_BOUNDS_VARIABLE]].get_value() x_bounds = ifile[gidx[ ifc.NAME_X_SRC_BOUNDS_VARIABLE]].get_value() indices = src_indices else: raise NotImplementedError if not is_unstruct: islice = tuple([ slice(y_bounds[ii][0], y_bounds[ii][1]), slice(x_bounds[ii][0], x_bounds[ii][1]) ]) oindices = indices[islice] oindices = oindices.flatten() odata = oindices[odata - 1] return odata
def plot_centers(path): rd = RequestDataset(path) vc = rd.get_variable_collection() x = vc[FACE_CENTER_X].get_value() # [::100] y = vc[FACE_CENTER_Y].get_value() # [::100] plt.scatter(x, y, marker='o', color='b')
def plot_centers(path): rd = RequestDataset(path) vc = rd.get_raw_field() x = vc[FACE_CENTER_X].get_value() # [::100] y = vc[FACE_CENTER_Y].get_value() # [::100] plt.scatter(x, y, marker='o', color='b')
def test_system_writing_to_netcdf(self): """Test coordinate system is retrievable from netCDF format.""" path = self.get_temporary_file_path('crs.nc') with nc.Dataset(path, 'w') as rootgrp: WGS84().write_to_rootgrp(rootgrp) rd = RequestDataset(path) infield = rd.get() actual = infield.first() self.assertEqual(actual, WGS84()) actual_crs = AbstractProj4CRS.load_from_metadata(rd.metadata) self.assertEqual(actual_crs, WGS84())
def analyze_weights(): folder = '/home/benkoziol/htmp/esmf_weights_full_20170628' for f in os.listdir(folder): if f.startswith('esmf_weights'): f = os.path.join(folder, f) print f rd = RequestDataset(f) vc = rd.get_variable_collection() weights = vc['S'].get_value() wmin, wmax = weights.min(), weights.max() if wmin < 0: raise ValueError('min less than 0: {}'.format(f)) if wmax > 1.0 + 1e-6: raise ValueError('max greater than 1 ({}): {}'.format(wmax, f))
def analyze_weights(): folder = '/home/benkoziol/htmp/esmf_weights_full_20170628' for f in os.listdir(folder): if f.startswith('esmf_weights'): f = os.path.join(folder, f) print f rd = RequestDataset(f) vc = rd.get_raw_field() weights = vc['S'].get_value() wmin, wmax = weights.min(), weights.max() if wmin < 0: raise ValueError('min less than 0: {}'.format(f)) if wmax > 1.0 + 1e-6: raise ValueError('max greater than 1 ({}): {}'.format(wmax, f))
def resolution(): rd = RequestDataset(IN_PATH) vc = rd.get_variable_collection() x = vc[FACE_CENTER_X].get_value() x = np.sort(x) y = vc[FACE_CENTER_Y].get_value() y = np.sort(y) dx = np.diff(x) sx = {dx.min(), dx.mean(), dx.max()} dy = np.diff(y) sy = {dy.min(), dy.mean(), dy.max()} print sx, sy
def fixture(self, **kwargs): path = self.get_temporary_file_path('__testdriverugrid__.nc') u = get_ugrid_data_structure() u.write(path) rd = RequestDataset(path, driver=DriverNetcdfUGRID, **kwargs) du = DriverNetcdfUGRID(rd) return du
def resolution(): rd = RequestDataset(IN_PATH) vc = rd.get_raw_field() x = vc[FACE_CENTER_X].get_value() x = np.sort(x) y = vc[FACE_CENTER_Y].get_value() y = np.sort(y) dx = np.diff(x) sx = {dx.min(), dx.mean(), dx.max()} dy = np.diff(y) sy = {dy.min(), dy.mean(), dy.max()} print sx, sy
def plot_subset(): # src_file = '/home/benkoziol/l/data/ocgis/ugrid-cesm-subsetting/UGRID_1km-merge-10min_HYDRO1K-merge-nomask_c130402.nc' src_file = '/home/benkoziol/htmp/src_subset_57.nc' dst_file = '/home/benkoziol/htmp/dst_subset_57.nc' # the_plt = plot_centers(src_file) ugrid_corner_plotting(src_file, show=False) rd = RequestDataset(dst_file) vc = rd.get() x = vc['x'].get_value() y = vc['y'].get_value() x, y = np.meshgrid(x, y) x = x.flatten() y = y.flatten() plt.scatter(x, y, marker='x', color='r') plt.show()
def test_system_get_field_from_file(self): """Test returning a distributed field from file.""" field = self.get_field(nrow=5, ncol=7) if MPI_RANK == 0: path = self.get_temporary_file_path('data.nc') else: path = None path = MPI_COMM.bcast(path) with vm.scoped('write test field', [0]): if MPI_RANK == 0: field.write(path) MPI_COMM.Barrier() rd = RequestDataset(path) out_field = rd.get() if MPI_SIZE == 8: self.assertEqual(vm.size, 8) if MPI_RANK == 0: path2 = self.get_temporary_file_path('out_field.nc') else: path2 = None path2 = MPI_COMM.bcast(path2) with vm.scoped_by_emptyable('out_field write', out_field): if not vm.is_null: out_field.write(path2) MPI_COMM.Barrier() with vm.scoped('get actual', [0]): if MPI_RANK == 0: actual = RequestDataset(path2).get() actual = actual.data_variables[0].get_value().sum() else: actual = None actual = MPI_COMM.bcast(actual) desired = field.data_variables[0].get_value().sum() self.assertAlmostEqual(actual, desired)
def insert_weighted(index_path, dst_wd, dst_master_path): """ Inserted weighted, destination variable data into the master destination file. :param str index_path: Path to the split index netCDF file. :param str dst_wd: Working directory containing the destination files holding the weighted data. :param str dst_master_path: Path to the destination master weight file. """ index_field = RequestDataset(index_path).get() gs_index_v = index_field[ GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE] dst_filenames = gs_index_v.attrs[ GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE] dst_filenames = index_field[dst_filenames] y_bounds = GridChunkerConstants.IndexFile.NAME_Y_DST_BOUNDS_VARIABLE y_bounds = gs_index_v.attrs[y_bounds] y_bounds = index_field[y_bounds].get_value() x_bounds = GridChunkerConstants.IndexFile.NAME_X_DST_BOUNDS_VARIABLE x_bounds = gs_index_v.attrs[x_bounds] x_bounds = index_field[x_bounds].get_value() joined = dst_filenames.join_string_value() dst_master_field = RequestDataset(dst_master_path).get() for data_variable in dst_master_field.data_variables: assert data_variable.ndim == 3 assert not data_variable.has_allocated_value for time_index in range(dst_master_field.time.shape[0]): for vidx, source_path in enumerate(joined): source_path = os.path.join(dst_wd, source_path) slc = { dst_master_field.time.dimensions[0].name: time_index, dst_master_field.y.dimensions[0].name: slice(None), dst_master_field.x.dimensions[0].name: slice(None) } source_data = RequestDataset(source_path).get()[ data_variable.name][slc] assert not source_data.has_allocated_value with nc.Dataset(dst_master_path, 'a') as ds: ds.variables[data_variable.name][ time_index, y_bounds[vidx][0]:y_bounds[vidx][1], x_bounds[vidx][0]:x_bounds[vidx] [1]] = source_data.get_value()
def test_system_grid_chunking(self): if vm.size != 4: raise SkipTest('vm.size != 4') from ocgis.spatial.grid_chunker import GridChunker path = self.path_esmf_unstruct rd_dst = RequestDataset(uri=path, driver=DriverESMFUnstruct, crs=Spherical(), grid_abstraction='point', grid_is_isomorphic=True) rd_src = deepcopy(rd_dst) resolution = 0.28125 chunk_wd = os.path.join(self.current_dir_output, 'chunks') if vm.rank == 0: os.mkdir(chunk_wd) vm.barrier() paths = {'wd': chunk_wd} gc = GridChunker(rd_src, rd_dst, nchunks_dst=[8], src_grid_resolution=resolution, dst_grid_resolution=resolution, optimized_bbox_subset=True, paths=paths, genweights=True) gc.write_chunks() dist = OcgDist() local_ctr = Dimension(name='ctr', size=8, dist=True) dist.add_dimension(local_ctr) dist.update_dimension_bounds() for ctr in range(local_ctr.bounds_local[0], local_ctr.bounds_local[1]): ctr += 1 s = os.path.join(chunk_wd, 'split_src_{}.nc'.format(ctr)) d = os.path.join(chunk_wd, 'split_dst_{}.nc'.format(ctr)) sf = Field.read(s, driver=DriverESMFUnstruct) df = Field.read(d, driver=DriverESMFUnstruct) self.assertGreater(sf.grid.shape[0], df.grid.shape[0]) wgt = os.path.join(chunk_wd, 'esmf_weights_{}.nc'.format(ctr)) f = Field.read(wgt) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0) with vm.scoped('merge weights', [0]): if not vm.is_null: merged_weights = self.get_temporary_file_path( 'merged_weights.nc') gc.create_merged_weight_file(merged_weights, strict=False) f = Field.read(merged_weights) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0)
def test_system_converting_state_boundaries_shapefile_memory(self): """Test iteration may be used in place of loading all values from source.""" rd = RequestDataset(uri=self.path_state_boundaries) field = rd.get() data_variable_names = get_variable_names(field.data_variables) field.geom.protected = True sub = field.get_field_slice({'geom': slice(10, 20)}) self.assertTrue(sub.geom.protected) self.assertFalse(sub.geom.has_allocated_value) self.assertIsInstance(sub, Field) self.assertIsInstance(sub.geom, GeometryVariable) gc = sub.geom.convert_to(use_geometry_iterator=True) self.assertIsInstance(gc, PolygonGC) self.assertFalse(sub.geom.has_allocated_value) self.assertTrue(field.geom.protected) path = self.get_temporary_file_path('out.nc') gc.parent.write(path)
def ugrid_area(): # path = '/home/benkoziol/htmp/src_subset_1.nc' path = '/home/benkoziol/l/data/ocgis/ugrid-cesm-subsetting/UGRID_1km-merge-10min_HYDRO1K-merge-nomask_c130402.nc' rd = RequestDataset(path) vc = rd.get_raw_field() vc.load() face_nodes = vc['landmesh_face_node'].get_value() face_node_x = vc['landmesh_node_x'].get_value() face_node_y = vc['landmesh_node_y'].get_value() face_center_x = vc['landmesh_face_x'].get_value() face_center_y = vc['landmesh_face_y'].get_value() areas = [] for ctr, idx in enumerate(range(face_nodes.shape[0])): if ctr % 10000 == 0: print '{} of {}'.format(ctr, face_nodes.shape[0]) curr_face_indices = face_nodes[idx, :] curr_face_node_x = face_node_x[curr_face_indices] curr_face_node_y = face_node_y[curr_face_indices] face_coords = np.zeros((4, 2)) face_coords[:, 0] = curr_face_node_x face_coords[:, 1] = curr_face_node_y poly = Polygon(face_coords) parea = poly.area poly = shapely.geometry.box(*poly.bounds) pt = Point(face_center_x[idx], face_center_y[idx]) if not poly.intersects(pt): print idx, np.array(pt), poly.bounds # if parea > 1: # print idx # print face_nodes[idx, :] # print face_coords # print poly.bounds # sys.exit() areas.append(parea)
def ugrid_area(): # path = '/home/benkoziol/htmp/src_subset_1.nc' path = '/home/benkoziol/l/data/ocgis/ugrid-cesm-subsetting/UGRID_1km-merge-10min_HYDRO1K-merge-nomask_c130402.nc' rd = RequestDataset(path) vc = rd.get_variable_collection() vc.load() face_nodes = vc['landmesh_face_node'].get_value() face_node_x = vc['landmesh_node_x'].get_value() face_node_y = vc['landmesh_node_y'].get_value() face_center_x = vc['landmesh_face_x'].get_value() face_center_y = vc['landmesh_face_y'].get_value() areas = [] for ctr, idx in enumerate(range(face_nodes.shape[0])): if ctr % 10000 == 0: print '{} of {}'.format(ctr, face_nodes.shape[0]) curr_face_indices = face_nodes[idx, :] curr_face_node_x = face_node_x[curr_face_indices] curr_face_node_y = face_node_y[curr_face_indices] face_coords = np.zeros((4, 2)) face_coords[:, 0] = curr_face_node_x face_coords[:, 1] = curr_face_node_y poly = Polygon(face_coords) parea = poly.area poly = shapely.geometry.box(*poly.bounds) pt = Point(face_center_x[idx], face_center_y[idx]) if not poly.intersects(pt): print idx, np.array(pt), poly.bounds # if parea > 1: # print idx # print face_nodes[idx, :] # print face_coords # print poly.bounds # sys.exit() areas.append(parea)
def _gc_remap_weight_variable_(self, ii, wvn, odata, src_indices, dst_indices, ifile, gidx, split_grids_directory=None): if wvn == 'S': pass else: ifc = GridChunkerConstants.IndexFile if wvn == 'row': is_unstruct = isinstance(self.dst_grid, GridUnstruct) if is_unstruct: dst_filename = ifile[gidx[ifc.NAME_DESTINATION_VARIABLE]].join_string_value()[ii] dst_filename = os.path.join(split_grids_directory, dst_filename) oindices = RequestDataset(dst_filename).get()[ifc.NAME_DSTIDX_GUID].get_value() else: y_bounds = ifile[gidx[ifc.NAME_Y_DST_BOUNDS_VARIABLE]].get_value() x_bounds = ifile[gidx[ifc.NAME_X_DST_BOUNDS_VARIABLE]].get_value() indices = dst_indices elif wvn == 'col': is_unstruct = isinstance(self.src_grid, GridUnstruct) if is_unstruct: src_filename = ifile[gidx[ifc.NAME_SOURCE_VARIABLE]].join_string_value()[ii] src_filename = os.path.join(split_grids_directory, src_filename) oindices = RequestDataset(src_filename).get()[ifc.NAME_SRCIDX_GUID].get_value() else: y_bounds = ifile[gidx[ifc.NAME_Y_SRC_BOUNDS_VARIABLE]].get_value() x_bounds = ifile[gidx[ifc.NAME_X_SRC_BOUNDS_VARIABLE]].get_value() indices = src_indices else: raise NotImplementedError if not is_unstruct: islice = tuple([slice(y_bounds[ii][0], y_bounds[ii][1]), slice(x_bounds[ii][0], x_bounds[ii][1])]) oindices = indices[islice] oindices = oindices.flatten() odata = oindices[odata - 1] return odata
def test_system_spatial_subsetting(self): """Test spatial subsetting ESMF Unstructured format.""" bbox = shapely.geometry.box(*[-119.2, 61.7, -113.2, 62.7]) gvar = GeometryVariable(name='geom', value=bbox, is_bbox=True, dimensions='ngeom', crs=Spherical()) gvar.unwrap() rd = RequestDataset(uri=self.path_esmf_unstruct, driver=DriverESMFUnstruct, crs=Spherical(), grid_abstraction='point', grid_is_isomorphic=True) field = rd.create_field() sub, slc = field.grid.get_intersects(gvar, optimized_bbox_subset=True, return_slice=True) desired_extent = np.array((240.890625, 61.8046875, 246.796875, 62.6484375)) self.assertGreaterEqual(len(vm.get_live_ranks_from_object(sub)), 1) with vm.scoped_by_emptyable('reduction', sub): if not vm.is_null: red = sub.reduce_global() self.assertNumpyAllClose(desired_extent, np.array(red.extent_global)) path = self.get_temporary_file_path('foo.nc', collective=True) with vm.scoped_by_emptyable('write', sub): if not vm.is_null: red.parent.write(path)
def test_system_converting_state_boundaries_shapefile_memory(self): """Test iteration may be used in place of loading all values from source.""" rd = RequestDataset(uri=self.path_state_boundaries) field = rd.get() data_variable_names = get_variable_names(field.data_variables) field.geom.protected = True sub = field.get_field_slice({'geom': slice(10, 20)}) self.assertTrue(sub.geom.protected) self.assertFalse(sub.geom.has_allocated_value) self.assertIsInstance(sub, Field) self.assertIsInstance(sub.geom, GeometryVariable) gc = sub.geom.convert_to(use_geometry_iterator=True) self.assertIsInstance(gc, PolygonGC) # Test the new object does not share data with the source. for dn in data_variable_names: self.assertNotIn(dn, gc.parent) self.assertFalse(sub.geom.has_allocated_value) self.assertTrue(field.geom.protected) path = self.get_temporary_file_path('out.nc') gc.parent.write(path)
def test_write(self): du = self.fixture() field = du.create_field() path = self.get_temporary_file_path('foo.nc') field.write(path) actual = RequestDataset(path, driver=DriverNetcdfUGRID).get() attr_host = actual.dimension_map.get_variable(DMK.ATTRIBUTE_HOST, parent=actual) actual.remove_variable(attr_host) actual.dimension_map.set_variable(DMK.ATTRIBUTE_HOST, None) self.assertNotIn(attr_host.name, actual) res = actual.driver.create_host_attribute_variable(actual.dimension_map) self.assertEqual(res.attrs, attr_host.attrs) path2 = self.get_temporary_file_path('foo2.nc') actual.write(path2) actual2 = RequestDataset(path2, driver=DriverNetcdfUGRID).get() attr_host = actual2.dimension_map.get_variable(DMK.ATTRIBUTE_HOST) self.assertIsNotNone(attr_host)
def fixture_esmf_unstruct_field(self): rd = RequestDataset(metadata=self.metadata_esmf_unstruct, driver=DriverESMFUnstruct) return rd.create_field()
def test_system_converting_state_boundaries_shapefile(self): ocgis.env.USE_NETCDF4_MPI = False # tdk:FIX: this hangs in the STATE_FIPS write for asynch might be nc4 bug... keywords = {'transform_to_crs': [None, Spherical], 'use_geometry_iterator': [False, True]} actual_xsums = [] actual_ysums = [] for k in self.iter_product_keywords(keywords): if k.use_geometry_iterator and k.transform_to_crs is not None: to_crs = k.transform_to_crs() else: to_crs = None if k.transform_to_crs is None: desired_crs = WGS84() else: desired_crs = k.transform_to_crs() rd = RequestDataset(uri=self.path_state_boundaries) rd.metadata['schema']['geometry'] = 'MultiPolygon' field = rd.get() # Test there is no mask present. field.geom.load() self.assertFalse(field.geom.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, field) self.assertIsNone(field.dimension_map.get_spatial_mask()) self.assertEqual(field.crs, WGS84()) if k.transform_to_crs is not None: field.update_crs(desired_crs) try: gc = field.geom.convert_to(pack=False, use_geometry_iterator=k.use_geometry_iterator, to_crs=to_crs) except ValueError as e: try: self.assertFalse(k.use_geometry_iterator) self.assertIsNotNone(to_crs) except AssertionError: raise e else: continue actual_xsums.append(gc.x.get_value().sum()) actual_ysums.append(gc.y.get_value().sum()) self.assertEqual(gc.crs, desired_crs) # Test there is no mask present after conversion to geometry coordinates. self.assertFalse(gc.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, gc.parent) self.assertIsNone(gc.dimension_map.get_spatial_mask()) for v in list(field.values()): if v.name != field.geom.name: gc.parent.add_variable(v.extract(), force=True) path = self.get_temporary_file_path('esmf_state_boundaries.nc') self.assertEqual(gc.parent.crs, desired_crs) gc.parent.write(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) gathered_geoms = vm.gather(field.geom.get_value()) if vm.rank == 0: actual_geoms = [] for g in gathered_geoms: actual_geoms.extend(g) rd = RequestDataset(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) infield = rd.get() self.assertEqual(create_crs(infield.crs.value), desired_crs) for dv in field.data_variables: self.assertIn(dv.name, infield) ingrid = infield.grid self.assertIsInstance(ingrid, GridUnstruct) for g in ingrid.archetype.iter_geometries(): self.assertPolygonSimilar(g[1], actual_geoms[g[0]], check_type=False) vm.barrier() # Test coordinates have actually changed. if not k.use_geometry_iterator: for ctr, to_test in enumerate([actual_xsums, actual_ysums]): for lhs, rhs in itertools.combinations(to_test, 2): if ctr == 0: self.assertAlmostEqual(lhs, rhs) else: self.assertNotAlmostEqual(lhs, rhs)
def create_merged_weight_file(self, merged_weight_filename, strict=False): """ Merge weight file chunks to a single, global weight file. :param str merged_weight_filename: Path to the merged weight file. :param bool strict: If ``False``, allow "missing" files where the iterator index cannot create a found file. It is best to leave these ``False`` as not all source and destinations are mapped. If ``True``, raise an """ if vm.size > 1: raise ValueError("'create_merged_weight_file' does not work in parallel") index_filename = self.create_full_path_from_template('index_file') ifile = RequestDataset(uri=index_filename).get() ifile.load() ifc = GridChunkerConstants.IndexFile gidx = ifile[ifc.NAME_INDEX_VARIABLE].attrs src_global_shape = gidx[ifc.NAME_SRC_GRID_SHAPE] dst_global_shape = gidx[ifc.NAME_DST_GRID_SHAPE] # Get the global weight dimension size. n_s_size = 0 weight_filename = ifile[gidx[ifc.NAME_WEIGHTS_VARIABLE]] wv = weight_filename.join_string_value() split_weight_file_directory = self.paths['wd'] for wfn in map(lambda x: os.path.join(split_weight_file_directory, os.path.split(x)[1]), wv): if not os.path.exists(wfn): if strict: raise IOError(wfn) else: continue n_s_size += RequestDataset(wfn).get().dimensions['n_s'].size # Create output weight file. wf_varnames = ['row', 'col', 'S'] wf_dtypes = [np.int32, np.int32, np.float64] vc = VariableCollection() dim = Dimension('n_s', n_s_size) for w, wd in zip(wf_varnames, wf_dtypes): var = Variable(name=w, dimensions=dim, dtype=wd) vc.add_variable(var) vc.write(merged_weight_filename) # Transfer weights to the merged file. sidx = 0 src_indices = self.src_grid._gc_create_global_indices_(src_global_shape) dst_indices = self.dst_grid._gc_create_global_indices_(dst_global_shape) out_wds = nc.Dataset(merged_weight_filename, 'a') for ii, wfn in enumerate(map(lambda x: os.path.join(split_weight_file_directory, x), wv)): if not os.path.exists(wfn): if strict: raise IOError(wfn) else: continue wdata = RequestDataset(wfn).get() for wvn in wf_varnames: odata = wdata[wvn].get_value() try: split_grids_directory = self.paths['wd'] odata = self._gc_remap_weight_variable_(ii, wvn, odata, src_indices, dst_indices, ifile, gidx, split_grids_directory=split_grids_directory) except IndexError as e: msg = "Weight filename: '{}'; Weight Variable Name: '{}'. {}".format(wfn, wvn, str(e)) raise IndexError(msg) out_wds[wvn][sidx:sidx + odata.size] = odata out_wds.sync() sidx += odata.size out_wds.close()
def insert_weighted(index_path, dst_wd, dst_master_path, data_variables='auto'): """ Inserted weighted, destination variable data into the master destination file. :param str index_path: Path to the split index netCDF file. :param str dst_wd: Working directory containing the destination files holding the weighted data. :param str dst_master_path: Path to the destination master weight file. :param list data_variables: Optional list of data variables. Otherwise, auto-discovery is used. """ if vm.size > 1: raise NotImplementedError('serial only') index_field = RequestDataset(index_path).get() gs_index_v = index_field[GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE] dst_filenames = gs_index_v.attrs[GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE] dst_filenames = index_field[dst_filenames] y_bounds = GridChunkerConstants.IndexFile.NAME_Y_DST_BOUNDS_VARIABLE y_bounds = gs_index_v.attrs[y_bounds] y_bounds = index_field[y_bounds].get_value() x_bounds = GridChunkerConstants.IndexFile.NAME_X_DST_BOUNDS_VARIABLE x_bounds = gs_index_v.attrs[x_bounds] x_bounds = index_field[x_bounds].get_value() joined = dst_filenames.join_string_value() if data_variables == 'auto': v = None else: v = data_variables dst_master_field = RequestDataset(dst_master_path, variable=v).get() for data_variable in dst_master_field.data_variables: assert not data_variable.has_allocated_value if data_variable.ndim == 3: for time_index in range(dst_master_field.time.shape[0]): for vidx, source_path in enumerate(joined): source_path = os.path.join(dst_wd, source_path) slc = {dst_master_field.time.dimensions[0].name: time_index, dst_master_field.y.dimensions[0].name: slice(None), dst_master_field.x.dimensions[0].name: slice(None)} source_field = RequestDataset(source_path).create_field() try: source_data = source_field[data_variable.name][slc] except KeyError: if data_variable.name not in source_field.keys(): msg = "The destination variable '{}' is not in the destination file '{}'. Was SMM applied?".format( data_variable.name, source_path) raise KeyError(msg) else: raise assert not source_data.has_allocated_value with nc.Dataset(dst_master_path, 'a') as ds: ds.variables[data_variable.name][time_index, y_bounds[vidx][0]:y_bounds[vidx][1], x_bounds[vidx][0]:x_bounds[vidx][1]] = source_data.get_value() elif data_variable.ndim == 2: for vidx, source_path in enumerate(joined): source_path = os.path.join(dst_wd, source_path) source_data = RequestDataset(source_path).get()[data_variable.name] assert not source_data.has_allocated_value with nc.Dataset(dst_master_path, 'a') as ds: ds.variables[data_variable.name][y_bounds[vidx][0]:y_bounds[vidx][1], x_bounds[vidx][0]:x_bounds[vidx][1]] = source_data.get_value() else: raise NotImplementedError(data_variable.ndim)
def test_system_converting_state_boundaries_shapefile(self): ocgis.env.USE_NETCDF4_MPI = False # tdk:FIX: this hangs in the STATE_FIPS write for asynch might be nc4 bug... keywords = {'transform_to_crs': [None, Spherical], 'use_geometry_iterator': [False, True]} actual_xsums = [] actual_ysums = [] for k in self.iter_product_keywords(keywords): if k.use_geometry_iterator and k.transform_to_crs is not None: to_crs = k.transform_to_crs() else: to_crs = None if k.transform_to_crs is None: desired_crs = WGS84() else: desired_crs = k.transform_to_crs() rd = RequestDataset(uri=self.path_state_boundaries, variable=['UGID', 'ID']) rd.metadata['schema']['geometry'] = 'MultiPolygon' field = rd.get() self.assertEqual(len(field.data_variables), 2) # Test there is no mask present. field.geom.load() self.assertFalse(field.geom.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, field) self.assertIsNone(field.dimension_map.get_spatial_mask()) self.assertEqual(field.crs, WGS84()) if k.transform_to_crs is not None: field.update_crs(desired_crs) self.assertEqual(len(field.data_variables), 2) self.assertEqual(len(field.geom.parent.data_variables), 2) try: gc = field.geom.convert_to(pack=False, use_geometry_iterator=k.use_geometry_iterator, to_crs=to_crs) except ValueError as e: try: self.assertFalse(k.use_geometry_iterator) self.assertIsNotNone(to_crs) except AssertionError: raise e else: continue actual_xsums.append(gc.x.get_value().sum()) actual_ysums.append(gc.y.get_value().sum()) self.assertEqual(gc.crs, desired_crs) # Test there is no mask present after conversion to geometry coordinates. self.assertFalse(gc.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, gc.parent) self.assertIsNone(gc.dimension_map.get_spatial_mask()) path = self.get_temporary_file_path('esmf_state_boundaries.nc') self.assertEqual(gc.parent.crs, desired_crs) gc.parent.write(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) gathered_geoms = vm.gather(field.geom.get_value()) if vm.rank == 0: actual_geoms = [] for g in gathered_geoms: actual_geoms.extend(g) rd = RequestDataset(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) infield = rd.get() self.assertEqual(create_crs(infield.crs.value), desired_crs) for dv in field.data_variables: self.assertIn(dv.name, infield) ingrid = infield.grid self.assertIsInstance(ingrid, GridUnstruct) for g in ingrid.archetype.iter_geometries(): self.assertPolygonSimilar(g[1], actual_geoms[g[0]], check_type=False) vm.barrier() # Test coordinates have actually changed. if not k.use_geometry_iterator: for ctr, to_test in enumerate([actual_xsums, actual_ysums]): for lhs, rhs in itertools.combinations(to_test, 2): if ctr == 0: self.assertAlmostEqual(lhs, rhs) else: self.assertNotAlmostEqual(lhs, rhs)
def create_merged_weight_file(self, merged_weight_filename, strict=False): """ Merge weight file chunks to a single, global weight file. :param str merged_weight_filename: Path to the merged weight file. :param bool strict: If ``False``, allow "missing" files where the iterator index cannot create a found file. It is best to leave these ``False`` as not all source and destinations are mapped. If ``True``, raise an """ if vm.size > 1: raise ValueError( "'create_merged_weight_file' does not work in parallel") index_filename = self.create_full_path_from_template('index_file') ifile = RequestDataset(uri=index_filename).get() ifile.load() ifc = GridChunkerConstants.IndexFile gidx = ifile[ifc.NAME_INDEX_VARIABLE].attrs src_global_shape = gidx[ifc.NAME_SRC_GRID_SHAPE] dst_global_shape = gidx[ifc.NAME_DST_GRID_SHAPE] # Get the global weight dimension size. n_s_size = 0 weight_filename = ifile[gidx[ifc.NAME_WEIGHTS_VARIABLE]] wv = weight_filename.join_string_value() split_weight_file_directory = self.paths['wd'] for wfn in map( lambda x: os.path.join(split_weight_file_directory, os.path.split(x)[1]), wv): ocgis_lh(msg="current merge weight file target: {}".format(wfn), level=logging.DEBUG, logger=_LOCAL_LOGGER) if not os.path.exists(wfn): if strict: raise IOError(wfn) else: continue curr_dimsize = RequestDataset(wfn).get().dimensions['n_s'].size # ESMF writes the weight file, but it may be empty if there are no generated weights. if curr_dimsize is not None: n_s_size += curr_dimsize # Create output weight file. wf_varnames = ['row', 'col', 'S'] wf_dtypes = [np.int32, np.int32, np.float64] vc = VariableCollection() dim = Dimension('n_s', n_s_size) for w, wd in zip(wf_varnames, wf_dtypes): var = Variable(name=w, dimensions=dim, dtype=wd) vc.add_variable(var) vc.write(merged_weight_filename) # Transfer weights to the merged file. sidx = 0 src_indices = self.src_grid._gc_create_global_indices_( src_global_shape) dst_indices = self.dst_grid._gc_create_global_indices_( dst_global_shape) out_wds = nc.Dataset(merged_weight_filename, 'a') for ii, wfn in enumerate( map(lambda x: os.path.join(split_weight_file_directory, x), wv)): if not os.path.exists(wfn): if strict: raise IOError(wfn) else: continue wdata = RequestDataset(wfn).get() for wvn in wf_varnames: odata = wdata[wvn].get_value() try: split_grids_directory = self.paths['wd'] odata = self._gc_remap_weight_variable_( ii, wvn, odata, src_indices, dst_indices, ifile, gidx, split_grids_directory=split_grids_directory) except IndexError as e: msg = "Weight filename: '{}'; Weight Variable Name: '{}'. {}".format( wfn, wvn, str(e)) raise IndexError(msg) out_wds[wvn][sidx:sidx + odata.size] = odata out_wds.sync() sidx += odata.size out_wds.close()
def insert_weighted(index_path, dst_wd, dst_master_path, data_variables='auto'): """ Inserted weighted, destination variable data into the master destination file. :param str index_path: Path to the split index netCDF file. :param str dst_wd: Working directory containing the destination files holding the weighted data. :param str dst_master_path: Path to the destination master weight file. :param list data_variables: Optional list of data variables. Otherwise, auto-discovery is used. """ if vm.size > 1: raise NotImplementedError('serial only') index_field = RequestDataset(index_path).get() gs_index_v = index_field[ GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE] dst_filenames = gs_index_v.attrs[ GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE] dst_filenames = index_field[dst_filenames] y_bounds = GridChunkerConstants.IndexFile.NAME_Y_DST_BOUNDS_VARIABLE y_bounds = gs_index_v.attrs[y_bounds] y_bounds = index_field[y_bounds].get_value() x_bounds = GridChunkerConstants.IndexFile.NAME_X_DST_BOUNDS_VARIABLE x_bounds = gs_index_v.attrs[x_bounds] x_bounds = index_field[x_bounds].get_value() joined = dst_filenames.join_string_value() if data_variables == 'auto': v = None else: v = data_variables dst_master_field = RequestDataset(dst_master_path, variable=v).get() for data_variable in dst_master_field.data_variables: assert not data_variable.has_allocated_value if data_variable.ndim == 3: for time_index in range(dst_master_field.time.shape[0]): for vidx, source_path in enumerate(joined): source_path = os.path.join(dst_wd, source_path) slc = { dst_master_field.time.dimensions[0].name: time_index, dst_master_field.y.dimensions[0].name: slice(None), dst_master_field.x.dimensions[0].name: slice(None) } source_field = RequestDataset( source_path).create_field() try: source_data = source_field[data_variable.name][slc] except KeyError: if data_variable.name not in source_field.keys(): msg = "The destination variable '{}' is not in the destination file '{}'. Was SMM applied?".format( data_variable.name, source_path) raise KeyError(msg) else: raise assert not source_data.has_allocated_value with nc.Dataset(dst_master_path, 'a') as ds: ds.variables[data_variable.name][ time_index, y_bounds[vidx][0]:y_bounds[vidx][1], x_bounds[vidx][0]:x_bounds[vidx] [1]] = source_data.get_value() elif data_variable.ndim == 2: for vidx, source_path in enumerate(joined): source_path = os.path.join(dst_wd, source_path) source_data = RequestDataset(source_path).get()[ data_variable.name] assert not source_data.has_allocated_value with nc.Dataset(dst_master_path, 'a') as ds: ds.variables[data_variable.name][ y_bounds[vidx][0]:y_bounds[vidx][1], x_bounds[vidx] [0]:x_bounds[vidx][1]] = source_data.get_value() else: raise NotImplementedError(data_variable.ndim)