def test_system_spatial_subsetting(self): """Test spatial subsetting ESMF Unstructured format.""" bbox = shapely.geometry.box(*[-119.2, 61.7, -113.2, 62.7]) gvar = GeometryVariable(name='geom', value=bbox, is_bbox=True, dimensions='ngeom', crs=Spherical()) gvar.unwrap() rd = RequestDataset(uri=self.path_esmf_unstruct, driver=DriverESMFUnstruct, crs=Spherical(), grid_abstraction='point', grid_is_isomorphic=True) field = rd.create_field() sub, slc = field.grid.get_intersects(gvar, optimized_bbox_subset=True, return_slice=True) desired_extent = np.array( (240.890625, 61.8046875, 246.796875, 62.6484375)) self.assertGreaterEqual(len(vm.get_live_ranks_from_object(sub)), 1) with vm.scoped_by_emptyable('reduction', sub): if not vm.is_null: red = sub.reduce_global() self.assertNumpyAllClose(desired_extent, np.array(red.extent_global)) path = self.get_temporary_file_path('foo.nc', collective=True) with vm.scoped_by_emptyable('write', sub): if not vm.is_null: red.parent.write(path)
def test_get_distributed_slice(self): self.add_barrier = False for d in [True, False]: dist = OcgDist() dim = dist.create_dimension('five', 5, dist=d, src_idx='auto') dist.update_dimension_bounds() if not dim.is_empty: self.assertEqual(dim.bounds_global, (0, 5)) if dim.dist: if MPI_RANK > 1: self.assertTrue(dim.is_empty) else: self.assertFalse(dim.is_empty) with vm.scoped_by_emptyable('dim slice', dim): if not vm.is_null: sub = dim.get_distributed_slice(slice(1, 3)) else: sub = None if dim.dist: if not dim.is_empty: self.assertIsNotNone(dim._src_idx) else: self.assertIsNone(sub) if MPI_SIZE == 2: desired_emptiness = {0: False, 1: True}[MPI_RANK] desired_bounds_local = {0: (0, 2), 1: (0, 0)}[MPI_RANK] self.assertEqual(sub.is_empty, desired_emptiness) self.assertEqual(sub.bounds_local, desired_bounds_local) if MPI_SIZE >= 5 and 0 < MPI_RANK > 2: self.assertTrue(sub is None or sub.is_empty) else: self.assertEqual(len(dim), 5) self.assertEqual(dim.bounds_global, (0, 5)) self.assertEqual(dim.bounds_local, (0, 5)) dist = OcgDist() dim = dist.create_dimension('five', 5, dist=True, src_idx='auto') dist.update_dimension_bounds() with vm.scoped_by_emptyable('five slice', dim): if not vm.is_null: sub2 = dim.get_distributed_slice(slice(2, 4)) else: sub2 = None if MPI_SIZE == 3 and MPI_RANK == 2: self.assertIsNone(sub2)
def test_get_distributed_slice(self): self.add_barrier = False for d in [True, False]: dist = OcgDist() dim = dist.create_dimension('five', 5, dist=d, src_idx='auto') dist.update_dimension_bounds() if not dim.is_empty: self.assertEqual(dim.bounds_global, (0, 5)) if dim.dist: if MPI_RANK > 1: self.assertTrue(dim.is_empty) else: self.assertFalse(dim.is_empty) with vm.scoped_by_emptyable('dim slice', dim): if not vm.is_null: sub = dim.get_distributed_slice(slice(1, 3)) else: sub = None if dim.dist: if not dim.is_empty: self.assertIsNotNone(dim._src_idx) else: self.assertIsNone(sub) if MPI_SIZE == 2: desired_emptiness = {0: False, 1: True}[MPI_RANK] desired_bounds_local = {0: (0, 2), 1: (0, 0)}[MPI_RANK] self.assertEqual(sub.is_empty, desired_emptiness) self.assertEqual(sub.bounds_local, desired_bounds_local) if MPI_SIZE >= 5 and 0 < MPI_RANK > 2: self.assertTrue(sub is None or sub.is_empty) else: self.assertEqual(len(dim), 5) self.assertEqual(dim.bounds_global, (0, 5)) self.assertEqual(dim.bounds_local, (0, 5)) dist = OcgDist() dim = dist.create_dimension('five', 5, dist=True, src_idx='auto') dist.update_dimension_bounds() with vm.scoped_by_emptyable('five slice', dim): if not vm.is_null: sub2 = dim.get_distributed_slice(slice(2, 4)) else: sub2 = None if MPI_SIZE == 3 and MPI_RANK == 2: self.assertIsNone(sub2)
def test_get_unioned_spatial_average_parallel(self): if MPI_SIZE != 8: raise SkipTest('MPI_SIZE != 8') dist = OcgDist() geom_count = dist.create_dimension('geom_count', size=8, dist=True) time_count = dist.create_dimension('time', size=3) dist.update_dimension_bounds() if not geom_count.is_empty: gvar = GeometryVariable(value=[Point(1.0, 1.0).buffer(MPI_RANK + 2)] * len(geom_count), dimensions=geom_count) value = np.zeros((len(time_count), len(geom_count)), dtype=float) for ii in range(value.shape[0]): value[ii] = [MPI_RANK + 1 + ii + 1] * len(geom_count) data = Variable(name='data', value=value, dtype=float, dimensions=[time_count, geom_count]) else: gvar = GeometryVariable(dimensions=geom_count) data = Variable(name='data', dimensions=[time_count, geom_count], dtype=float) gvar.parent.add_variable(data) self.assertTrue(gvar.is_empty == data.is_empty == gvar.parent.is_empty) with vm.scoped_by_emptyable('union', gvar): if vm.is_null: unioned = None else: unioned = gvar.get_unioned(spatial_average='data') if unioned is not None: self.assertIsInstance(unioned, GeometryVariable) actual = unioned.parent[data.name] self.assertAlmostEqual(actual.get_value().max(), 5.5466666666666677) else: self.assertIsNone(unioned)
def test_get_distributed_slice_simple(self): ompi = OcgDist() dim = ompi.create_dimension('five', 5, dist=True, src_idx='auto') ompi.update_dimension_bounds(min_elements=1) with vm.scoped_by_emptyable('simple slice test', dim): if not vm.is_null: sub = dim.get_distributed_slice(slice(2, 4)) else: sub = None if sub is not None and not sub.is_empty: self.assertEqual(sub.bounds_global, (0, 2)) else: if dim.is_empty: self.assertIsNone(sub) else: self.assertEqual(sub.bounds_global, (0, 0)) self.assertEqual(sub.bounds_local, (0, 0)) # Test global bounds are updated. ompi = OcgDist() dim = ompi.create_dimension('tester', 768, dist=False) ompi.update_dimension_bounds() sub = dim.get_distributed_slice(slice(73, 157)) self.assertEqual(sub.size, 84) self.assertEqual(sub.bounds_global, (0, 84)) self.assertEqual(sub.bounds_local, (0, 84))
def main(): rd = RequestDataset(IN_PATH, driver=DriverNetcdfUGRID, grid_abstraction=GridAbstraction.POINT) field = rd.get() foo = '/tmp/foo.nc' # assert field.grid.cindex is not None # print field.grid.archetype # tkk print field.shapes sub = field.grid.get_intersects(box(*BBOX), optimized_bbox_subset=True).parent with vm.scoped_by_emptyable('reduce global', sub): if not vm.is_null: sub.grid_abstraction = GridAbstraction.POLYGON # rank_print('sub.grid.abstraction', sub.grid.abstraction) # rank_print('sub.grid._abstraction', sub.grid._abstraction) # rank_print('archetype', sub.grid.archetype) # rank_print(sub.grid.extent) rank_print('sub', sub.grid.cindex.get_value()) subr = sub.grid.reduce_global().parent rank_print('sub', subr.grid.cindex.get_value()) # rank_print(subr.x.name) # rank_print(subr.x.get_value().min()) rank_print(subr.grid.extent) # rank_print(subr.grid.cindex.get_value()) # rank_print(subr.shapes) # subr.write(foo) # if vm.rank == 0: # RequestDataset(foo).inspect() vm.barrier()
def test_write_variable_collection_object_arrays(self): """Test writing variable length arrays in parallel.""" with vm.scoped('write', [0]): if not vm.is_null: path_actual = self.get_temporary_file_path('in.nc') path_desired = self.get_temporary_file_path('out.nc') value = [[1, 3, 5], [7, 9], [11]] v = Variable(name='objects', value=value, fill_value=4, dtype=ObjectType(int), dimensions='values') v.write(path_desired) else: v, path_actual, path_desired = [None] * 3 path_actual = MPI_COMM.bcast(path_actual) path_desired = MPI_COMM.bcast(path_desired) dest_mpi = OcgDist() dest_mpi.create_dimension('values', 3, dist=True) dest_mpi.update_dimension_bounds() scattered = variable_scatter(v, dest_mpi) outvc = VariableCollection(variables=[scattered]) with vm.scoped_by_emptyable('write', outvc): if not vm.is_null: outvc.write(path_actual) if MPI_RANK == 0: self.assertNcEqual(path_actual, path_desired)
def test_variable_gather_bounded_source_index(self): self.add_barrier = False dist = OcgDist() dist.create_dimension('long', 7, dist=True, src_idx='auto') dist.create_dimension('short', 3, src_idx='auto') dist.update_dimension_bounds() if vm.rank == 0: var = Variable(name='test', value=np.arange(21).reshape(7, 3), dimensions=['long', 'short']) else: var = None svar = variable_scatter(var, dist) with vm.scoped_by_emptyable('gather test', svar): if vm.is_null: return gvar = variable_gather(svar) if vm.rank == 0: actual = gvar.dimensions[0]._src_idx desired = (0, 7) self.assertEqual(actual, desired) else: self.assertIsNone(gvar)
def main(): rd = RequestDataset(IN_PATH, driver=DriverNetcdfUGRID, grid_abstraction=GridAbstraction.POINT) field = rd.get() foo = '/tmp/foo.nc' # assert field.grid.cindex is not None # print field.grid.archetype # tkk print field.shapes sub = field.grid.get_intersects(box(*BBOX), optimized_bbox_subset=True).parent with vm.scoped_by_emptyable('reduce global', sub): if not vm.is_null: sub.grid_abstraction = GridAbstraction.POLYGON # rank_print('sub.grid.abstraction', sub.grid.abstraction) # rank_print('sub.grid._abstraction', sub.grid._abstraction) # rank_print('archetype', sub.grid.archetype) # rank_print(sub.grid.extent) rank_print('sub', sub.grid.cindex.get_value()) subr = sub.grid.reduce_global().parent rank_print('sub', subr.grid.cindex.get_value()) # rank_print(subr.x.name) # rank_print(subr.x.get_value().min()) rank_print(subr.grid.extent) # rank_print(subr.grid.cindex.get_value()) # rank_print(subr.shapes) # subr.write(foo) # if vm.rank == 0: # RequestDataset(foo).inspect() vm.barrier()
def test_write_variable_collection_object_arrays(self): """Test writing variable length arrays in parallel.""" with vm.scoped('write', [0]): if not vm.is_null: path_actual = self.get_temporary_file_path('in.nc') path_desired = self.get_temporary_file_path('out.nc') value = [[1, 3, 5], [7, 9], [11]] v = Variable(name='objects', value=value, fill_value=4, dtype=ObjectType(int), dimensions='values') v.write(path_desired) else: v, path_actual, path_desired = [None] * 3 path_actual = MPI_COMM.bcast(path_actual) path_desired = MPI_COMM.bcast(path_desired) dest_mpi = OcgDist() dest_mpi.create_dimension('values', 3, dist=True) dest_mpi.update_dimension_bounds() scattered = variable_scatter(v, dest_mpi) outvc = VariableCollection(variables=[scattered]) with vm.scoped_by_emptyable('write', outvc): if not vm.is_null: outvc.write(path_actual) if MPI_RANK == 0: self.assertNcEqual(path_actual, path_desired)
def test_create_unique_global_array(self): dist = OcgDist() dist.create_dimension('dim', 9, dist=True) dist.update_dimension_bounds() values = [[4, 2, 1, 2, 1, 4, 1, 4, 2], [44, 25, 16, 27, 18, 49, 10, 41, 22], [44, 25, 16, 27, 44, 49, 10, 41, 44], [1, 1, 1, 1, 1, 1, 1, 1, 1]] for v in values: if vm.rank == 0: index = Variable(name='cindex', value=v, dimensions='dim') desired = np.unique(index.get_value()) desired_length = len(desired) else: index = None index = variable_scatter(index, dist) with vm.scoped_by_emptyable('not empty', index): if not vm.is_null: uvar = create_unique_global_array(index.get_value()) uvar_gathered = vm.gather(uvar) if vm.rank == 0: uvar_gathered = hgather(uvar_gathered) self.assertEqual(len(uvar_gathered), desired_length) self.assertEqual(set(uvar_gathered), set(desired))
def test_create_unique_global_array(self): dist = OcgDist() dist.create_dimension('dim', 9, dist=True) dist.update_dimension_bounds() values = [ [4, 2, 1, 2, 1, 4, 1, 4, 2], [44, 25, 16, 27, 18, 49, 10, 41, 22], [44, 25, 16, 27, 44, 49, 10, 41, 44], [1, 1, 1, 1, 1, 1, 1, 1, 1] ] for v in values: if vm.rank == 0: index = Variable(name='cindex', value=v, dimensions='dim') desired = np.unique(index.get_value()) desired_length = len(desired) else: index = None index = variable_scatter(index, dist) with vm.scoped_by_emptyable('not empty', index): if not vm.is_null: uvar = create_unique_global_array(index.get_value()) uvar_gathered = vm.gather(uvar) if vm.rank == 0: uvar_gathered = hgather(uvar_gathered) self.assertEqual(len(uvar_gathered), desired_length) self.assertEqual(set(uvar_gathered), set(desired))
def test_write_variable_collection(self): if MPI_RANK == 0: path_in = self.get_temporary_file_path('foo.nc') path_out = self.get_temporary_file_path('foo_out.nc') with self.nc_scope(path_in, 'w') as ds: ds.createDimension('seven', 7) var = ds.createVariable('var_seven', float, dimensions=('seven', )) var[:] = np.arange(7, dtype=float) + 10 var.foo = 'bar' else: path_in, path_out = [None] * 2 path_in = MPI_COMM.bcast(path_in) path_out = MPI_COMM.bcast(path_out) rd = RequestDataset(path_in) rd.metadata['dimensions']['seven']['dist'] = True driver = DriverNetcdf(rd) vc = driver.get_variable_collection() with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path_out) if MPI_RANK == 0: self.assertNcEqual(path_in, path_out)
def test_get_distributed_slice_simple(self): ompi = OcgDist() dim = ompi.create_dimension('five', 5, dist=True, src_idx='auto') ompi.update_dimension_bounds(min_elements=1) with vm.scoped_by_emptyable('simple slice test', dim): if not vm.is_null: sub = dim.get_distributed_slice(slice(2, 4)) else: sub = None if sub is not None and not sub.is_empty: self.assertEqual(sub.bounds_global, (0, 2)) else: if dim.is_empty: self.assertIsNone(sub) else: self.assertEqual(sub.bounds_global, (0, 0)) self.assertEqual(sub.bounds_local, (0, 0)) # Test global bounds are updated. ompi = OcgDist() dim = ompi.create_dimension('tester', 768, dist=False) ompi.update_dimension_bounds() sub = dim.get_distributed_slice(slice(73, 157)) self.assertEqual(sub.size, 84) self.assertEqual(sub.bounds_global, (0, 84)) self.assertEqual(sub.bounds_local, (0, 84))
def test_system_parallel_write_ndvariable(self): """Test a parallel CSV write with a n-dimensional variable.""" ompi = OcgDist() ompi.create_dimension('time', 3) ompi.create_dimension('extra', 2) ompi.create_dimension('x', 4) ompi.create_dimension('y', 7, dist=True) ompi.update_dimension_bounds() if MPI_RANK == 0: path = self.get_temporary_file_path('foo.csv') t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time') t.set_extrapolated_bounds('the_time_bounds', 'bounds') extra = Variable(name='extra', value=[7, 8], dimensions='extra') x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float) x.set_extrapolated_bounds('x_bounds', 'bounds') # This will have the distributed dimension. y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float) y.set_extrapolated_bounds('y_bounds', 'bounds') data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x']) vc = VariableCollection(variables=[t, extra, x, y, data]) else: path, vc = [None] * 2 path = MPI_COMM.bcast(path) vc = variable_collection_scatter(vc, ompi) with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path, iter_kwargs={ 'variable': 'data', 'followers': ['time', 'extra', 'y', 'x'] }, driver=DriverCSV) if MPI_RANK == 0: desired = 169 with open(path, 'r') as f: lines = f.readlines() self.assertEqual(len(lines), desired)
def test_redistribute_by_src_idx(self): if vm.size != 4: raise SkipTest('vm.size != 4') dist = OcgDist() dim1 = dist.create_dimension('dim1', 5 * vm.size, dist=True) dim2 = dist.create_dimension('dim2', 2, dist=False) dist.update_dimension_bounds() rank_value = np.arange(5) + (10 * (vm.rank + 1)) var1 = Variable(name='dvar1', value=rank_value, dimensions=dim1) var2 = Variable(name='dvar2', dimensions=[dim1, dim2]) var1.parent.add_variable(var2) path = self.get_temporary_file_path('out.nc') var1.parent.write(path) desired_idx = np.array([1, 7, 9, 10, 14]) vdesired_value = variable_gather(var1) if vm.rank == 0: desired_value = vdesired_value.get_value()[desired_idx] desired_idx_ranks = {0: slice(1, 2), 1: [2, 4], 2: [0, 4]} rd = RequestDataset(path) rd.metadata['dimensions'][dim1.name]['dist'] = True field = rd.create_field() indvar = field[var1.name] field[var2.name].load() try: rank_slice = desired_idx_ranks[vm.rank] except KeyError: sub = Variable(is_empty=True) else: sub = indvar[rank_slice] self.barrier_print(sub.is_empty) redistribute_by_src_idx(indvar, dim1.name, sub.dimensions_dict.get(dim1.name)) with vm.scoped_by_emptyable('gather for test', indvar): if vm.is_null: self.assertIn(vm.rank_global, [2, 3]) else: self.assertIn(vm.rank_global, [0, 1]) for v in [indvar, indvar.parent[var2.name]]: self.assertIsNone(v._value) self.assertIsNone(v._mask) self.assertIsNone(v._is_empty) self.assertFalse(v._has_initialized_value) self.rank_print(indvar) actual_value = variable_gather(indvar) if vm.rank == 0: actual_value = actual_value.get_value() self.assertNumpyAll(actual_value, desired_value)
def test_get_spatial_subset(self): ctr_test = 0 for ss, k in self: for geometry_record in self.get_subset_geometries(): for operation in ['intersects', 'clip', 'foo']: # if ctr_test != 18: # ctr_test += 1 # continue if MPI_RANK == 0: output_path = self.get_temporary_file_path( 'file-{}.nc'.format(ctr_test)) else: output_path = None output_path = MPI_COMM.bcast(output_path) ctr_test += 1 use_geometry = deepcopy(geometry_record['geom']) use_ss = deepcopy(ss) try: ret = use_ss.get_spatial_subset(operation, use_geometry, use_spatial_index=True, buffer_value=None, buffer_crs=None, geom_crs=WGS84()) except ValueError: # 'foo' is not a valid type of subset operation. if operation == 'foo': continue else: raise except EmptySubsetError: try: self.assertEqual( list(k.target.keys())[0], 'lambert') self.assertEqual( geometry_record['properties']['DESC'], 'Germany') except AssertionError: self.assertEqual( list(k.target.keys())[0], 'rotated_pole') self.assertEqual( geometry_record['properties']['DESC'], 'Nebraska') continue else: self.assertIsInstance(ret, Field) with vm.scoped_by_emptyable('write', ret): if not vm.is_null: ret.write(output_path) self.assertGreater(ctr_test, 5)
def test_get_wrapped_state(self): if sys.version_info.major == 3 and sys.version_info.minor == 5: raise SkipTest('undefined behavior with Python 3.5') ompi = OcgDist() ompi.create_dimension('x', 5, dist=True) ompi.create_dimension('y', 1) ompi.update_dimension_bounds() values = [{ 'value': [-179, -90, 0, 90, 180], 'desired': WrappedState.WRAPPED }, { 'value': [0, 90, 180, 270, 360], 'desired': WrappedState.UNWRAPPED }, { 'value': [1, 2, 3, 4, 5], 'desired': WrappedState.UNKNOWN }] kwds = {'values': values, 'crs': [Spherical(), None]} for k in self.iter_product_keywords(kwds): ompi = deepcopy(ompi) if MPI_RANK == 0: vx = Variable(name='x', value=k.values['value'], dimensions='x') vy = Variable(name='y', value=[0], dimensions='y') else: vx, vy = [None] * 2 vx = variable_scatter(vx, ompi) vy = variable_scatter(vy, ompi) grid = Grid(vx, vy) field = Field(grid=grid, crs=k.crs) with vm.scoped_by_emptyable('wrap', field): if not vm.is_null: wrapped_state = field.wrapped_state else: wrapped_state = None if not field.is_empty: if k.crs is None: self.assertIsNone(wrapped_state) else: self.assertIsNotNone(wrapped_state) if k.crs is None or field.is_empty: self.assertIsNone(wrapped_state) else: self.assertEqual(wrapped_state, k.values['desired'])
def test_system_spatial_wrapping_and_reorder(self): if sys.version_info.major == 3 and sys.version_info.minor == 5: raise SkipTest('undefined behavior with Python 3.5') keywords = {'spatial_wrapping': list(SpatialWrapping.iter_possible()), 'crs': [None, Spherical(), CoordinateReferenceSystem(epsg=2136)], 'unwrapped': [True, False], 'spatial_reorder': [False, True]} for ctr, k in enumerate(self.iter_product_keywords(keywords)): field = self.get_wrap_field(crs=k.crs, unwrapped=k.unwrapped) ops = OcgOperations(dataset=field, spatial_wrapping=k.spatial_wrapping, spatial_reorder=k.spatial_reorder) ret = ops.execute() actual_field = ret.get_element() with vm.scoped_by_emptyable('wrapped state', actual_field): if not vm.is_null: actual = actual_field.wrapped_state else: actual = None actual_x = actual_field.grid.x.get_value() if not actual_field.is_empty: self.assertLessEqual(actual_x.max(), 360.) if k.spatial_reorder and k.unwrapped and k.spatial_wrapping == 'wrap' and k.crs == Spherical(): actual_data_value = actual_field.data_variables[0].get_value() desired_reordered = [None] * actual_data_value.shape[1] for idx in range(actual_data_value.shape[1]): desired_reordered[idx] = [3.0, 4.0, 0.0, 1.0, 2.0] for tidx in range(actual_data_value.shape[0]): time_data_value = actual_data_value[tidx] self.assertEqual(time_data_value.tolist(), desired_reordered) if k.spatial_reorder and not k.unwrapped and not k.spatial_wrapping: self.assertTrue(actual_x[0] < actual_x[-1]) if actual is None or k.crs != Spherical(): desired = None else: p = k.spatial_wrapping if p is None: if k.unwrapped: desired = WrappedState.UNWRAPPED else: desired = WrappedState.WRAPPED elif p == 'wrap': desired = WrappedState.WRAPPED else: desired = WrappedState.UNWRAPPED self.assertEqual(actual, desired)
def test_get_wrapped_state(self): if sys.version_info.major == 3 and sys.version_info.minor == 5: raise SkipTest('undefined behavior with Python 3.5') ompi = OcgDist() ompi.create_dimension('x', 5, dist=True) ompi.create_dimension('y', 1) ompi.update_dimension_bounds() values = [{'value': [-179, -90, 0, 90, 180], 'desired': WrappedState.WRAPPED}, {'value': [0, 90, 180, 270, 360], 'desired': WrappedState.UNWRAPPED}, {'value': [1, 2, 3, 4, 5], 'desired': WrappedState.UNKNOWN}] kwds = {'values': values, 'crs': [Spherical(), None]} for k in self.iter_product_keywords(kwds): ompi = deepcopy(ompi) if MPI_RANK == 0: vx = Variable(name='x', value=k.values['value'], dimensions='x') vy = Variable(name='y', value=[0], dimensions='y') else: vx, vy = [None] * 2 vx = variable_scatter(vx, ompi) vy = variable_scatter(vy, ompi) grid = Grid(vx, vy) field = Field(grid=grid, crs=k.crs) with vm.scoped_by_emptyable('wrap', field): if not vm.is_null: wrapped_state = field.wrapped_state else: wrapped_state = None if not field.is_empty: if k.crs is None: self.assertIsNone(wrapped_state) else: self.assertIsNotNone(wrapped_state) if k.crs is None or field.is_empty: self.assertIsNone(wrapped_state) else: self.assertEqual(wrapped_state, k.values['desired']) # Test with masked geometries. values = [Point(350, 2), Point(-90, 5), Point(340, 5)] mask = [True, False, True] gvar = GeometryVariable(name='geom', value=values, mask=mask, dimensions='ngeom') crs = Spherical() wrapped_state = crs.get_wrapped_state(gvar) self.assertEqual(wrapped_state, WrappedState.WRAPPED)
def test_system_spatial_averaging_from_file(self): rd_nc = self.test_data.get_rd('cancm4_tas') rd_shp = RequestDataset(self.path_state_boundaries) field_shp = rd_shp.get() actual = field_shp.dimension_map.get_variable(DMK.GEOM) self.assertIsNotNone(actual) actual = field_shp.dimension_map.get_dimension(DMK.GEOM) self.assertEqual(len(actual), 1) self.assertEqual(field_shp.crs, WGS84()) try: index_geom = np.where( field_shp['STATE_NAME'].get_value() == 'Nebraska')[0][0] except IndexError: # Not found on rank. polygon_field = None else: polygon_field = field_shp.get_field_slice({'geom': index_geom}) polygon_field = MPI_COMM.gather(polygon_field) if MPI_RANK == 0: for p in polygon_field: if p is not None: polygon_field = p break polygon_field = MPI_COMM.bcast(polygon_field) polygon_field.unwrap() polygon = polygon_field.geom.get_value()[0] field_nc = rd_nc.get() sub_field_nc = field_nc.get_field_slice({'time': slice(0, 10)}) self.assertEqual(sub_field_nc['tas']._dimensions, field_nc['tas']._dimensions) sub = sub_field_nc.grid.get_intersects(polygon) # When split across two processes, there are floating point summing differences. desired = {1: 2734.5195, 2: 2740.4014} with vm.scoped_by_emptyable('grid intersects', sub): if not vm.is_null: abstraction_geometry = sub.get_abstraction_geometry() sub.parent.add_variable(abstraction_geometry, force=True) unioned = abstraction_geometry.get_unioned( spatial_average='tas') if unioned is not None: tas = unioned.parent['tas'] self.assertFalse(tas.is_empty) self.assertAlmostEqual(tas.get_value().sum(), desired[vm.size], places=4)
def test_system_spatial_subsetting(self): """Test spatial subsetting ESMF Unstructured format.""" bbox = shapely.geometry.box(*[-119.2, 61.7, -113.2, 62.7]) gvar = GeometryVariable(name='geom', value=bbox, is_bbox=True, dimensions='ngeom', crs=Spherical()) gvar.unwrap() rd = RequestDataset(uri=self.path_esmf_unstruct, driver=DriverESMFUnstruct, crs=Spherical(), grid_abstraction='point', grid_is_isomorphic=True) field = rd.create_field() sub, slc = field.grid.get_intersects(gvar, optimized_bbox_subset=True, return_slice=True) desired_extent = np.array((240.890625, 61.8046875, 246.796875, 62.6484375)) self.assertGreaterEqual(len(vm.get_live_ranks_from_object(sub)), 1) with vm.scoped_by_emptyable('reduction', sub): if not vm.is_null: red = sub.reduce_global() self.assertNumpyAllClose(desired_extent, np.array(red.extent_global)) path = self.get_temporary_file_path('foo.nc', collective=True) with vm.scoped_by_emptyable('write', sub): if not vm.is_null: red.parent.write(path)
def test_system_parallel_write_ndvariable(self): """Test a parallel vector GIS write with a n-dimensional variable.""" ompi = OcgDist() ompi.create_dimension('time', 3) ompi.create_dimension('extra', 2) ompi.create_dimension('x', 4) ompi.create_dimension('y', 7, dist=True) ompi.update_dimension_bounds() if MPI_RANK == 0: path = self.get_temporary_file_path('foo.shp') t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time') t.set_extrapolated_bounds('the_time_bounds', 'bounds') extra = Variable(name='extra', value=[7, 8], dimensions='extra') x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float) x.set_extrapolated_bounds('x_bounds', 'bounds') # This will have the distributed dimension. y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float) y.set_extrapolated_bounds('y_bounds', 'bounds') data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x']) dimension_map = {'x': {'variable': 'x', 'bounds': 'x_bounds'}, 'y': {'variable': 'y', 'bounds': 'y_bounds'}, 'time': {'variable': 'time', 'bounds': 'the_time_bounds'}} vc = Field(variables=[t, extra, x, y, data], dimension_map=dimension_map, is_data='data') vc.set_abstraction_geom() else: path, vc = [None] * 2 path = MPI_COMM.bcast(path) vc = variable_collection_scatter(vc, ompi) with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path, driver=DriverVector) MPI_COMM.Barrier() desired = 168 rd = RequestDataset(path, driver=DriverVector) sizes = MPI_COMM.gather(rd.get().geom.shape[0]) if MPI_RANK == 0: self.assertEqual(sum(sizes), desired)
def test_system_parallel_write_ndvariable(self): """Test a parallel vector GIS write with a n-dimensional variable.""" ompi = OcgDist() ompi.create_dimension('time', 3) ompi.create_dimension('extra', 2) ompi.create_dimension('x', 4) ompi.create_dimension('y', 7, dist=True) ompi.update_dimension_bounds() if MPI_RANK == 0: path = self.get_temporary_file_path('foo.shp') t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time') t.set_extrapolated_bounds('the_time_bounds', 'bounds') extra = Variable(name='extra', value=[7, 8], dimensions='extra') x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float) x.set_extrapolated_bounds('x_bounds', 'bounds') # This will have the distributed dimension. y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float) y.set_extrapolated_bounds('y_bounds', 'bounds') data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x']) dimension_map = {'x': {'variable': 'x', 'bounds': 'x_bounds'}, 'y': {'variable': 'y', 'bounds': 'y_bounds'}, 'time': {'variable': 'time', 'bounds': 'the_time_bounds'}} vc = Field(variables=[t, extra, x, y, data], dimension_map=dimension_map, is_data='data') vc.set_abstraction_geom() else: path, vc = [None] * 2 path = MPI_COMM.bcast(path) vc = variable_collection_scatter(vc, ompi) with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path, driver=DriverVector) MPI_COMM.Barrier() desired = 168 rd = RequestDataset(path, driver=DriverVector) sizes = MPI_COMM.gather(rd.get().geom.shape[0]) if MPI_RANK == 0: self.assertEqual(sum(sizes), desired)
def create_esmf_grid_fromfile(filename, grid, esmf_kwargs): """ This call is collective across the VM and must be called by each rank. The underlying call to ESMF must be using the global VM. """ from ocgis import vm filetype = grid.driver.get_esmf_fileformat() klass = grid.driver.get_esmf_grid_class() if klass == ESMF.Grid: # Corners are only needed for conservative regridding. if esmf_kwargs.get('regrid_method') == ESMF.RegridMethod.BILINEAR: add_corner_stagger = False else: add_corner_stagger = True # Mask and variable name only supported with GRIDSPEC if filetype == ESMF.api.constants.FileFormat.GRIDSPEC: # If there is a spatial mask, pass this information to grid creation. root = vm.get_live_ranks_from_object(grid)[0] with vm.scoped_by_emptyable('masked values', grid): if not vm.is_null: if grid.has_masked_values_global: add_mask = True varname = grid.mask_variable.name else: add_mask = False varname = None else: varname, add_mask = [None] * 2 varname = vm.bcast(varname, root=root) add_mask = vm.bcast(add_mask, root=root) else: # With ESMF IO, only GRIDSPEC (CF-Grid) files have mask variable control at the API level add_mask, varname = None, None ret = klass(filename=filename, filetype=filetype, add_corner_stagger=add_corner_stagger, is_sphere=False, add_mask=add_mask, varname=varname) else: meshname = str(grid.dimension_map.get_variable(DMK.ATTRIBUTE_HOST)) ret = klass(filename=filename, filetype=filetype, meshname=meshname) return ret
def test_system_spatial_averaging_from_file(self): rd_nc = self.test_data.get_rd('cancm4_tas') rd_shp = RequestDataset(self.path_state_boundaries) field_shp = rd_shp.get() actual = field_shp.dimension_map.get_variable(DMK.GEOM) self.assertIsNotNone(actual) actual = field_shp.dimension_map.get_dimension(DMK.GEOM) self.assertEqual(len(actual), 1) self.assertEqual(field_shp.crs, WGS84()) try: index_geom = np.where(field_shp['STATE_NAME'].get_value() == 'Nebraska')[0][0] except IndexError: # Not found on rank. polygon_field = None else: polygon_field = field_shp.get_field_slice({'geom': index_geom}) polygon_field = MPI_COMM.gather(polygon_field) if MPI_RANK == 0: for p in polygon_field: if p is not None: polygon_field = p break polygon_field = MPI_COMM.bcast(polygon_field) polygon_field.unwrap() polygon = polygon_field.geom.get_value()[0] field_nc = rd_nc.get() sub_field_nc = field_nc.get_field_slice({'time': slice(0, 10)}) self.assertEqual(sub_field_nc['tas']._dimensions, field_nc['tas']._dimensions) sub = sub_field_nc.grid.get_intersects(polygon) # When split across two processes, there are floating point summing differences. desired = {1: 2734.5195, 2: 2740.4014} with vm.scoped_by_emptyable('grid intersects', sub): if not vm.is_null: abstraction_geometry = sub.get_abstraction_geometry() sub.parent.add_variable(abstraction_geometry, force=True) unioned = abstraction_geometry.get_unioned(spatial_average='tas') if unioned is not None: tas = unioned.parent['tas'] self.assertFalse(tas.is_empty) self.assertAlmostEqual(tas.get_value().sum(), desired[vm.size], places=4)
def test_system_get_field_from_file(self): """Test returning a distributed field from file.""" field = self.get_field(nrow=5, ncol=7) if MPI_RANK == 0: path = self.get_temporary_file_path('data.nc') else: path = None path = MPI_COMM.bcast(path) with vm.scoped('write test field', [0]): if MPI_RANK == 0: field.write(path) MPI_COMM.Barrier() rd = RequestDataset(path) out_field = rd.get() if MPI_SIZE == 8: self.assertEqual(vm.size, 8) if MPI_RANK == 0: path2 = self.get_temporary_file_path('out_field.nc') else: path2 = None path2 = MPI_COMM.bcast(path2) with vm.scoped_by_emptyable('out_field write', out_field): if not vm.is_null: out_field.write(path2) MPI_COMM.Barrier() with vm.scoped('get actual', [0]): if MPI_RANK == 0: actual = RequestDataset(path2).get() actual = actual.data_variables[0].get_value().sum() else: actual = None actual = MPI_COMM.bcast(actual) desired = field.data_variables[0].get_value().sum() self.assertAlmostEqual(actual, desired)
def test_system_get_field_from_file(self): """Test returning a distributed field from file.""" field = self.get_field(nrow=5, ncol=7) if MPI_RANK == 0: path = self.get_temporary_file_path('data.nc') else: path = None path = MPI_COMM.bcast(path) with vm.scoped('write test field', [0]): if MPI_RANK == 0: field.write(path) MPI_COMM.Barrier() rd = RequestDataset(path) out_field = rd.get() if MPI_SIZE == 8: self.assertEqual(vm.size, 8) if MPI_RANK == 0: path2 = self.get_temporary_file_path('out_field.nc') else: path2 = None path2 = MPI_COMM.bcast(path2) with vm.scoped_by_emptyable('out_field write', out_field): if not vm.is_null: out_field.write(path2) MPI_COMM.Barrier() with vm.scoped('get actual', [0]): if MPI_RANK == 0: actual = RequestDataset(path2).get() actual = actual.data_variables[0].get_value().sum() else: actual = None actual = MPI_COMM.bcast(actual) desired = field.data_variables[0].get_value().sum() self.assertAlmostEqual(actual, desired)
def test_extent_global(self): desired = (-180.0, -90.0, 180.0, 90.0) grid = self.get_gridxy_global() actual = grid.extent_global self.assertEqual(actual, desired) grid = self.get_gridxy_global(resolution=45.0) if not grid.is_empty: desired = (-180.0, -90.0, 180.0, 90.0) else: desired = None with vm.scoped_by_emptyable('grid extent', grid): if not vm.is_null: actual = grid.extent_global else: actual = None self.assertEqual(actual, desired)
def test_system_parallel_write_ndvariable(self): """Test a parallel CSV write with a n-dimensional variable.""" ompi = OcgDist() ompi.create_dimension('time', 3) ompi.create_dimension('extra', 2) ompi.create_dimension('x', 4) ompi.create_dimension('y', 7, dist=True) ompi.update_dimension_bounds() if MPI_RANK == 0: path = self.get_temporary_file_path('foo.csv') t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time') t.set_extrapolated_bounds('the_time_bounds', 'bounds') extra = Variable(name='extra', value=[7, 8], dimensions='extra') x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float) x.set_extrapolated_bounds('x_bounds', 'bounds') # This will have the distributed dimension. y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float) y.set_extrapolated_bounds('y_bounds', 'bounds') data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x']) vc = VariableCollection(variables=[t, extra, x, y, data]) else: path, vc = [None] * 2 path = MPI_COMM.bcast(path) vc = variable_collection_scatter(vc, ompi) with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path, iter_kwargs={'variable': 'data', 'followers': ['time', 'extra', 'y', 'x']}, driver=DriverCSV) if MPI_RANK == 0: desired = 169 with open(path, 'r') as f: lines = f.readlines() self.assertEqual(len(lines), desired)
def test_get_unioned_spatial_average_parallel(self): if MPI_SIZE != 8: raise SkipTest('MPI_SIZE != 8') dist = OcgDist() geom_count = dist.create_dimension('geom_count', size=8, dist=True) time_count = dist.create_dimension('time', size=3) dist.update_dimension_bounds() if not geom_count.is_empty: gvar = GeometryVariable( value=[Point(1.0, 1.0).buffer(MPI_RANK + 2)] * len(geom_count), dimensions=geom_count) value = np.zeros((len(time_count), len(geom_count)), dtype=float) for ii in range(value.shape[0]): value[ii] = [MPI_RANK + 1 + ii + 1] * len(geom_count) data = Variable(name='data', value=value, dtype=float, dimensions=[time_count, geom_count]) else: gvar = GeometryVariable(dimensions=geom_count) data = Variable(name='data', dimensions=[time_count, geom_count], dtype=float) gvar.parent.add_variable(data) self.assertTrue(gvar.is_empty == data.is_empty == gvar.parent.is_empty) with vm.scoped_by_emptyable('union', gvar): if vm.is_null: unioned = None else: unioned = gvar.get_unioned(spatial_average='data') if unioned is not None: self.assertIsInstance(unioned, GeometryVariable) actual = unioned.parent[data.name] self.assertAlmostEqual(actual.get_value().max(), 5.5466666666666677) else: self.assertIsNone(unioned)
def test_system_parallel_write(self): if MPI_RANK == 0: in_path = self.get_path_to_template_csv() out_path = self.get_temporary_file_path('foo_out.csv') else: in_path, out_path = [None] * 2 in_path = MPI_COMM.bcast(in_path) out_path = MPI_COMM.bcast(out_path) rd = RequestDataset(in_path) list(rd.metadata['dimensions'].values())[0]['dist'] = True field = rd.get_field() with vm.scoped_by_emptyable('vc.write', field): if not vm.is_null: field.write(out_path, driver=DriverCSV) if MPI_RANK == 0: self.assertCSVFilesEqual(in_path, out_path)
def create_esmf_grid_fromfile(filename, grid, esmf_kwargs): """ This call is collective across the VM and must be called by each rank. The underlying call to ESMF must be using the global VM. """ from ocgis import vm filetype = grid.driver.get_esmf_fileformat() klass = grid.driver.get_esmf_grid_class() if klass == ESMF.Grid: # Corners are only needed for conservative regridding. if esmf_kwargs.get('regrid_method') == ESMF.RegridMethod.BILINEAR: add_corner_stagger = False else: add_corner_stagger = True # If there is a spatial mask, pass this information to grid creation. root = vm.get_live_ranks_from_object(grid)[0] with vm.scoped_by_emptyable('masked values', grid): if not vm.is_null: if grid.has_masked_values_global: add_mask = True varname = grid.mask_variable.name else: add_mask = False varname = None else: varname, add_mask = [None] * 2 varname = vm.bcast(varname, root=root) add_mask = vm.bcast(add_mask, root=root) ret = klass(filename=filename, filetype=filetype, add_corner_stagger=add_corner_stagger, is_sphere=False, add_mask=add_mask, varname=varname) else: meshname = str(grid.dimension_map.get_variable(DMK.ATTRIBUTE_HOST)) ret = klass(filename=filename, filetype=filetype, meshname=meshname) return ret
def test_write_variable_collection(self): if MPI_RANK == 0: path_in = self.get_temporary_file_path('foo.nc') path_out = self.get_temporary_file_path('foo_out.nc') with self.nc_scope(path_in, 'w') as ds: ds.createDimension('seven', 7) var = ds.createVariable('var_seven', float, dimensions=('seven',)) var[:] = np.arange(7, dtype=float) + 10 var.foo = 'bar' else: path_in, path_out = [None] * 2 path_in = vm.bcast(path_in) path_out = vm.bcast(path_out) rd = RequestDataset(path_in) rd.metadata['dimensions']['seven']['dist'] = True driver = DriverNetcdf(rd) vc = driver.create_raw_field() with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path_out) if MPI_RANK == 0: self.assertNcEqual(path_in, path_out)
def test_variable_gather_bounded_source_index(self): self.add_barrier = False dist = OcgDist() dist.create_dimension('long', 7, dist=True, src_idx='auto') dist.create_dimension('short', 3, src_idx='auto') dist.update_dimension_bounds() if vm.rank == 0: var = Variable(name='test', value=np.arange(21).reshape(7, 3), dimensions=['long', 'short']) else: var = None svar = variable_scatter(var, dist) with vm.scoped_by_emptyable('gather test', svar): if vm.is_null: return gvar = variable_gather(svar) if vm.rank == 0: actual = gvar.dimensions[0]._src_idx desired = (0, 7) self.assertEqual(actual, desired) else: self.assertIsNone(gvar)
def test_get_intersects_parallel(self): if sys.version_info.major == 3 and sys.version_info.minor == 5: raise SkipTest('undefined behavior with Python 3.5') grid = self.get_gridxy() live_ranks = vm.get_live_ranks_from_object(grid) # Test with an empty subset. subset_geom = box(1000., 1000., 1100., 1100.) with vm.scoped('empty subset', live_ranks): if not vm.is_null: with self.assertRaises(EmptySubsetError): grid.get_intersects(subset_geom) # Test combinations. subset_geom = box(101.5, 40.5, 102.5, 42.) keywords = dict(is_vectorized=[True, False], has_bounds=[False, True], use_bounds=[False, True], keep_touches=[True, False]) for ctr, k in enumerate(self.iter_product_keywords(keywords)): grid = self.get_gridxy() vm_name, _ = vm.create_subcomm_by_emptyable('grid testing', grid, is_current=True) if vm.is_null: vm.free_subcomm(name=vm_name) vm.set_comm() continue if k.has_bounds: grid.set_extrapolated_bounds('xbounds', 'ybounds', 'bounds') self.assertTrue(grid.has_bounds) # Cannot use bounds with a point grid abstraction. if k.use_bounds and grid.abstraction == 'point': vm.free_subcomm(name=vm_name) vm.set_comm() continue grid_sub, slc = grid.get_intersects(subset_geom, keep_touches=k.keep_touches, use_bounds=k.use_bounds, return_slice=True) if k.has_bounds: self.assertTrue(grid.has_bounds) # Test geometries are filled appropriately after allocation. if not grid_sub.is_empty: for t in grid_sub.get_abstraction_geometry().get_value().flat: self.assertIsInstance(t, BaseGeometry) self.assertIsInstance(grid_sub, Grid) if k.keep_touches: if k.has_bounds and k.use_bounds: desired = (slice(0, 3, None), slice(0, 3, None)) else: desired = (slice(1, 3, None), slice(1, 2, None)) else: if k.has_bounds and k.use_bounds: desired = (slice(1, 3, None), slice(1, 2, None)) else: desired = (slice(1, 2, None), slice(1, 2, None)) if not grid.is_empty: self.assertEqual(grid.has_bounds, k.has_bounds) self.assertTrue(grid.is_vectorized) self.assertEqual(slc, desired) vm.free_subcomm(name=vm_name) vm.set_comm() # Test against a file. ######################################################################################### subset_geom = box(101.5, 40.5, 102.5, 42.) if MPI_RANK == 0: path_grid = self.get_temporary_file_path('grid.nc') else: path_grid = None path_grid = MPI_COMM.bcast(path_grid) grid_to_write = self.get_gridxy() with vm.scoped_by_emptyable('write', grid_to_write): if not vm.is_null: field = Field(grid=grid_to_write) field.write(path_grid, driver=DriverNetcdfCF) MPI_COMM.Barrier() rd = RequestDataset(uri=path_grid) x = SourcedVariable(name='x', request_dataset=rd) self.assertIsNone(x._value) y = SourcedVariable(name='y', request_dataset=rd) self.assertIsNone(x._value) self.assertIsNone(y._value) grid = Grid(x, y) for target in [grid._y_name, grid._x_name]: self.assertIsNone(grid.parent[target]._value) self.assertTrue(grid.is_vectorized) with vm.scoped_by_emptyable('intersects', grid): if not vm.is_null: sub, slc = grid.get_intersects(subset_geom, return_slice=True) self.assertEqual(slc, (slice(1, 3, None), slice(1, 2, None))) self.assertIsInstance(sub, Grid) # The file may be deleted before other ranks open. MPI_COMM.Barrier()
def global_grid_shape(grid): with vm.scoped_by_emptyable('global grid shape', grid): if not vm.is_null: return grid.shape_global
def write_subsets(self): """ Write grid subsets to netCDF files using the provided filename templates. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] src_slices = [] index_path = self.create_full_path_from_template('index_file') # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst))) ctr = 1 for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets(yield_dst=True): # if vm.rank == 0: # vm.rank_print('write_subset iterator count :: {}'.format(ctr)) # tstart = time.time() # padded = create_zero_padded_integer(ctr, nzeros) src_path = self.create_full_path_from_template('src_template', index=ctr) dst_path = self.create_full_path_from_template('dst_template', index=ctr) wgt_path = self.create_full_path_from_template('wgt_template', index=ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_path) dst_slices.append(dst_slc) src_slices.append(src_slc) # Only write destinations if an iterator is not provided. if self.iter_dst is None: zip_args = [[sub_src, sub_dst], [src_path, dst_path]] else: zip_args = [[sub_src], [src_path]] for target, path in zip(*zip_args): with vm.scoped_by_emptyable('field.write', target): if not vm.is_null: ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG) field = Field(grid=target) field.write(path) ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG) # Increment the counter outside of the loop to avoid counting empty subsets. ctr += 1 # if vm.rank == 0: # tstop = time.time() # vm.rank_print('timing::write_subset iteration::{}'.format(tstop - tstart)) # Global shapes require a VM global scope to collect. src_global_shape = global_grid_shape(self.src_grid) dst_global_shape = global_grid_shape(self.dst_grid) # Gather and collapse source slices as some may be empty and we write on rank 0. gathered_src_grid_slice = vm.gather(src_slices) if vm.rank == 0: len_src_slices = len(src_slices) new_src_grid_slice = [None] * len_src_slices for idx in range(len_src_slices): for rank_src_grid_slice in gathered_src_grid_slice: if rank_src_grid_slice[idx] is not None: new_src_grid_slice[idx] = rank_src_grid_slice[idx] break src_slices = new_src_grid_slice with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = ['source_filename', 'destination_filename', 'weights_filename'] values = [src_filenames, dst_filenames, wgt_filenames] grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{'esmf_role': 'grid_splitter_source'}, {'esmf_role': grid_splitter_destination}, {'esmf_role': 'grid_splitter_weights'}] vc = VariableCollection() grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_splitter_index) vidx.attrs['esmf_role'] = grid_splitter_index vidx.attrs['grid_splitter_source'] = 'source_filename' vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_splitter_weights'] = 'weights_filename' vidx.attrs[GridSplitterConstants.IndexFile.NAME_SRC_GRID_SHAPE] = src_global_shape vidx.attrs[GridSplitterConstants.IndexFile.NAME_DST_GRID_SHAPE] = dst_global_shape vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) # TODO: This needs to work with four dimensions. # Source ----------------------------------------------------------------------------------------------- self.src_grid._gs_create_index_bounds_(RegriddingRole.SOURCE, vidx, vc, src_slices, dim, bounds_dimension) # Destination ------------------------------------------------------------------------------------------ self.dst_grid._gs_create_index_bounds_(RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim, bounds_dimension) vc.write(index_path) vm.barrier()
field = create_exact_field(grid, 'exact', ntime=3, fill_data_var=False, crs=ocgis.crs.Spherical()) field.write(os.path.join(OUTDIR, 'dst_field_1km.nc')) gs = GridChunker(grid, grid, (10, 10)) ctr = 1 for grid_sub in gs.iter_dst_grid_subsets(): subset_filename = os.path.join(OUTDIR, 'src_subset_{}.nc'.format(ctr)) dst_subset_filename = os.path.join(OUTDIR, 'dst_subset_{}.nc'.format(ctr)) if vm.rank == 0: print 'creating subset:', subset_filename with vm.scoped_by_emptyable('grid subset', grid_sub): if not vm.is_null: extent_global = grid_sub.extent_global if vm.rank == 0: root = vm.rank_global else: extent_global = None live_ranks = vm.get_live_ranks_from_object(grid_sub) bbox = vm.bcast(extent_global, root=live_ranks[0]) vm.barrier() if vm.rank == 0: print 'starting bbox subset:', bbox vm.barrier()
def get_spatial_subset(self, operation, geom, use_spatial_index=env.USE_SPATIAL_INDEX, buffer_value=None, buffer_crs=None, geom_crs=None, select_nearest=False, optimized_bbox_subset=False): """ Perform a spatial subset operation on ``target``. :param str operation: Either ``'intersects'`` or ``'clip'``. :param geom: The input geometry object to use for subsetting of ``target``. :type geom: :class:`shapely.geometry.base.BaseGeometry` | :class:`ocgis.GeometryVariable` :param bool use_spatial_index: If ``True``, use an ``rtree`` spatial index. :param bool select_nearest: If ``True``, select the geometry nearest ``polygon`` using :meth:`shapely.geometry.base.BaseGeometry.distance`. :rtype: Same as ``target``. If ``target`` is a :class:`ocgis.RequestDataset`, then a :class:`ocgis.interface.base.field.Field` will be returned. :param float buffer_value: The buffer radius to use in units of the coordinate system of ``subset_sdim``. :param buffer_crs: If provided, then ``buffer_value`` are not in units of the coordinate system of ``subset_sdim`` but in units of ``buffer_crs``. :param geom_crs: The coordinate reference system for the subset geometry. :type geom_crs: :class:`ocgis.crs.CRS` :param bool select_nearest: If ``True``, following the spatial subset operation, select the nearest geometry in the subset data to ``geom``. Centroid-based distance is used. :type buffer_crs: :class:`ocgis.interface.base.crs.CoordinateReferenceSystem` :param bool optimized_bbox_subset: If ``True``, only do a bounding box subset and do not perform more complext GIS subset operations such as constructing a spatial index. :raises: ValueError """ if not isinstance(geom, GeometryVariable): geom = GeometryVariable(value=geom, name='geom', dimensions='one', crs=geom_crs) if geom.get_value().flatten().shape != (1,): msg = 'Only one subset geometry allowed. The shape of the geometry variable is {}.'.format(geom.shape) raise ValueError(msg) if optimized_bbox_subset: if self.field.grid is None: msg = 'Subset operation must be performed on a grid when "optimized_bbox_subset=True".' raise ValueError(msg) if operation != 'intersects': msg = 'Only "intersects" spatial operations when "optimized_bbox_subset=True".' raise ValueError(msg) # Buffer the subset if a buffer value is provided. if buffer_value is not None: geom = self._get_buffered_geometry_(geom, buffer_value, buffer_crs=buffer_crs) prepared = self._prepare_geometry_(geom) base_geometry = prepared.get_value().flatten()[0] # Prepare the target field. self._prepare_target_() # execute the spatial operation if operation == 'intersects': if self.field.grid is None: ret = self.field.geom.get_intersects(base_geometry, use_spatial_index=use_spatial_index, cascade=True).parent else: ret = self.field.grid.get_intersects(base_geometry, cascade=True, optimized_bbox_subset=optimized_bbox_subset).parent elif operation in ('clip', 'intersection'): if self.field.grid is None: ret = self.field.geom.get_intersection(base_geometry, use_spatial_index=use_spatial_index, cascade=True).parent else: ret = self.field.grid.get_intersection(base_geometry, cascade=True) # An intersection with a grid returns a geometry variable. Set this on the field. ret.parent.set_geom(ret) ret = ret.parent else: msg = 'The spatial operation "{0}" is not supported.'.format(operation) raise ValueError(msg) with vm.scoped_by_emptyable('return finalize', ret): if not vm.is_null: # Select the nearest geometry if requested. if select_nearest: ret.set_abstraction_geom() ret = ret.geom.get_nearest(base_geometry).parent # check for rotated pole and convert back to default CRS if self._original_rotated_pole_state is not None and self.output_crs == 'input': ret.update_crs(self._original_rotated_pole_state) # wrap the data... if self._get_should_wrap_(ret): ret.wrap() # convert the coordinate system if requested... if self.should_update_crs: ret.update_crs(self.output_crs) return ret
def test_get_intersects_ordering(self): """Test grid ordering/origins do not influence grid subsetting.""" keywords = { KeywordArgument.OPTIMIZED_BBOX_SUBSET: [False, True], 'should_wrap': [False, True], 'reverse_x': [False, True], 'reverse_y': [False, True], 'should_expand': [False, True], } x_value = np.array( [155., 160., 165., 170., 175., 180., 185., 190., 195., 200., 205.]) y_value = np.array([-20., -15., -10., -5., 0., 5., 10., 15., 20.]) bbox = [168., -12., 191., 5.3] for k in self.iter_product_keywords(keywords, as_namedtuple=False): reverse_x = k.pop('reverse_x') reverse_y = k.pop('reverse_y') should_expand = k.pop('should_expand') should_wrap = k.pop('should_wrap') ompi = OcgDist() ompi.create_dimension('dx', len(x_value), dist=True) ompi.create_dimension('dy', len(y_value)) ompi.update_dimension_bounds() if reverse_x: new_x_value = x_value.copy() new_x_value = np.flipud(new_x_value) else: new_x_value = x_value if reverse_y: new_y_value = y_value.copy() new_y_value = np.flipud(new_y_value) else: new_y_value = y_value if MPI_RANK == 0: x = Variable('x', new_x_value, 'dx') y = Variable('y', new_y_value, 'dy') else: x, y = [None, None] x = variable_scatter(x, ompi) y = variable_scatter(y, ompi) grid = Grid(x, y, crs=Spherical()) with vm.scoped_by_emptyable('scattered', grid): if not vm.is_null: if should_expand: expand_grid(grid) if should_wrap: grid = deepcopy(grid) grid.wrap() actual_bbox = MultiPolygon([ box(-180, -12, -169, 5.3), box(168, -12, 180, 5.3) ]) else: actual_bbox = box(*bbox) live_ranks = vm.get_live_ranks_from_object(grid) with vm.scoped('grid.get_intersects', live_ranks): if not vm.is_null: sub = grid.get_intersects(actual_bbox, **k) with vm.scoped_by_emptyable('sub grid', sub): if not vm.is_null: if should_wrap: current_x_value = sub.x.get_value() current_x_value[ sub.x.get_value() < 0] += 360 self.assertEqual( sub.extent_global, (170.0, -10.0, 190.0, 5.0)) if should_expand: desired = False else: desired = True self.assertEqual(grid.is_vectorized, desired) self.assertEqual(sub.is_vectorized, desired) self.assertFalse(grid.has_allocated_point) self.assertFalse( grid.has_allocated_polygon)
def write_subsets(self, src_template, dst_template, wgt_template, index_path): """ Write grid subsets to netCDF files using the provided filename templates. The template must contain the full file path with a single curly-bracer pair to insert the combination counter. ``wgt_template`` should not be a full path. This name is used when generating weight files. >>> template_example = '/path/to/data_{}.nc' :param str src_template: The template for the source subset file. :param str dst_template: The template for the destination subset file. :param str wgt_template: The template for the weight filename. >>> wgt_template = 'esmf_weights_{}.nc' :param index_path: Path to the output indexing netCDF. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst))) for ctr, (sub_src, sub_dst, dst_slc) in enumerate(self.iter_src_grid_subsets(yield_dst=True), start=1): # padded = create_zero_padded_integer(ctr, nzeros) src_path = src_template.format(ctr) dst_path = dst_template.format(ctr) wgt_filename = wgt_template.format(ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_filename) dst_slices.append(dst_slc) for target, path in zip([sub_src, sub_dst], [src_path, dst_path]): if target.is_empty: is_empty = True target = None else: is_empty = False field = Field(grid=target, is_empty=is_empty) ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG) with vm.scoped_by_emptyable('field.write', field): if not vm.is_null: field.write(path) ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG) with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = ['source_filename', 'destination_filename', 'weights_filename'] values = [src_filenames, dst_filenames, wgt_filenames] grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{'esmf_role': 'grid_splitter_source'}, {'esmf_role': grid_splitter_destination}, {'esmf_role': 'grid_splitter_weights'}] vc = VariableCollection() grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_splitter_index) vidx.attrs['esmf_role'] = grid_splitter_index vidx.attrs['grid_splitter_source'] = 'source_filename' vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_splitter_weights'] = 'weights_filename' x_bounds = GridSplitterConstants.IndexFile.NAME_X_BOUNDS_VARIABLE vidx.attrs[x_bounds] = x_bounds y_bounds = GridSplitterConstants.IndexFile.NAME_Y_BOUNDS_VARIABLE vidx.attrs[y_bounds] = y_bounds vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) xb = Variable(name=x_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'x_split_bounds'}, dtype=int) yb = Variable(name=y_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'y_split_bounds'}, dtype=int) x_name = self.dst_grid.x.dimensions[0].name y_name = self.dst_grid.y.dimensions[0].name for idx, slc in enumerate(dst_slices): xb.get_value()[idx, :] = slc[x_name].start, slc[x_name].stop yb.get_value()[idx, :] = slc[y_name].start, slc[y_name].stop vc.add_variable(xb) vc.add_variable(yb) vc.write(index_path) vm.barrier()
def iter_src_grid_subsets(self, yield_dst=False): """ Yield source grid subsets using the extent of its associated destination grid subset. :param bool yield_dst: If ``True``, yield the destination subset as well as the source grid subset. :return: The source grid if ``yield_dst`` is ``False``, otherwise a three-element tuple in the form ``(<source grid subset>, <destination grid subset>, <destination grid slice>)``. :rtype: :class:`ocgis.Grid` or (:class:`ocgis.Grid`, :class:`ocgis.Grid`, dict) """ if yield_dst: yield_slice = True else: yield_slice = False dst_grid_resolution = self.dst_grid.resolution src_grid_resolution = self.src_grid.resolution if dst_grid_resolution <= src_grid_resolution: target_resolution = dst_grid_resolution else: target_resolution = src_grid_resolution buffer_value = 2 * target_resolution for yld in self.iter_dst_grid_subsets(yield_slice=yield_slice): if yield_slice: dst_grid_subset, dst_slice = yld else: dst_grid_subset = yld dst_box = None with vm.scoped_by_emptyable('extent_global', dst_grid_subset): if not vm.is_null: if self.check_contains: dst_box = box(*dst_grid_subset.extent_global) # Use the envelope! A buffer returns "fancy" borders. We just want to expand the bounding box. sub_box = box(*dst_grid_subset.extent_global).buffer(buffer_value).envelope ocgis_lh(msg=str(sub_box.bounds), level=logging.DEBUG) else: sub_box, dst_box = [None, None] live_ranks = vm.get_live_ranks_from_object(dst_grid_subset) sub_box = vm.bcast(sub_box, root=live_ranks[0]) if self.check_contains: dst_box = vm.bcast(dst_box, root=live_ranks[0]) src_grid_subset = self.src_grid.get_intersects(sub_box, keep_touches=False, cascade=False, optimized_bbox_subset=True) if not self.allow_masked: gmask = self.src_grid.get_mask() if not self.allow_masked: if gmask is not None and gmask.any(): raise ValueError('Masked values in source grid subset.') with vm.scoped_by_emptyable('src_grid_subset', src_grid_subset): if not vm.is_null: if self.check_contains: src_box = box(*src_grid_subset.extent_global) if not does_contain(src_box, dst_box): raise ValueError('Contains check failed.') else: src_grid_subset = Grid(Variable('x', is_empty=True), Variable('y', is_empty=True)) if yield_dst: yld = (src_grid_subset, dst_grid_subset, dst_slice) else: yld = src_grid_subset yield yld
def get_spatial_subset(self, operation, geom, use_spatial_index=env.USE_SPATIAL_INDEX, buffer_value=None, buffer_crs=None, geom_crs=None, select_nearest=False, optimized_bbox_subset=False): """ Perform a spatial subset operation on ``target``. :param str operation: Either ``'intersects'`` or ``'clip'``. :param geom: The input geometry object to use for subsetting of ``target``. :type geom: :class:`shapely.geometry.base.BaseGeometry` :param bool use_spatial_index: If ``True``, use an ``rtree`` spatial index. :param bool select_nearest: If ``True``, select the geometry nearest ``polygon`` using :meth:`shapely.geometry.base.BaseGeometry.distance`. :rtype: Same as ``target``. If ``target`` is a :class:`ocgis.RequestDataset`, then a :class:`ocgis.interface.base.field.Field` will be returned. :param float buffer_value: The buffer radius to use in units of the coordinate system of ``subset_sdim``. :param buffer_crs: If provided, then ``buffer_value`` are not in units of the coordinate system of ``subset_sdim`` but in units of ``buffer_crs``. :param bool select_nearest: If ``True``, following the spatial subset operation, select the nearest geometry in the subset data to ``geom``. Centroid-based distance is used. :type buffer_crs: :class:`ocgis.interface.base.crs.CoordinateReferenceSystem` :param bool optimized_bbox_subset: If ``True``, only do a bounding box subset and do not perform more complext GIS subset operations such as constructing a spatial index. :raises: ValueError """ if not isinstance(geom, GeometryVariable): geom = GeometryVariable(value=geom, name='geom', dimensions='one', crs=geom_crs) if geom.get_value().flatten().shape != (1, ): msg = 'Only one subset geometry allowed. The shape of the geometry variable is {}.'.format( geom.shape) raise ValueError(msg) if optimized_bbox_subset: if self.field.grid is None: msg = 'Subset operation must be performed on a grid when "optimized_bbox_subset=True".' raise ValueError(msg) if operation != 'intersects': msg = 'Only "intersects" spatial operations when "optimized_bbox_subset=True".' raise ValueError(msg) # Buffer the subset if a buffer value is provided. if buffer_value is not None: geom = self._get_buffered_geometry_(geom, buffer_value, buffer_crs=buffer_crs) self._prepare_target_() prepared = self._prepare_geometry_(geom) base_geometry = prepared.get_value().flatten()[0] # execute the spatial operation if operation == 'intersects': if self.field.grid is None: ret = self.field.geom.get_intersects( base_geometry, use_spatial_index=use_spatial_index, cascade=True).parent else: ret = self.field.grid.get_intersects( base_geometry, cascade=True, optimized_bbox_subset=optimized_bbox_subset).parent elif operation in ('clip', 'intersection'): if self.field.grid is None: ret = self.field.geom.get_intersection( base_geometry, use_spatial_index=use_spatial_index, cascade=True).parent else: ret = self.field.grid.get_intersection(base_geometry, cascade=True) # An intersection with a grid returns a geometry variable. Set this on the field. ret.parent.set_geom(ret) ret = ret.parent else: msg = 'The spatial operation "{0}" is not supported.'.format( operation) raise ValueError(msg) with vm.scoped_by_emptyable('return finalize', ret): if not vm.is_null: # Select the nearest geometry if requested. if select_nearest: ret.set_abstraction_geom() ret = ret.geom.get_nearest(base_geometry).parent # check for rotated pole and convert back to default CRS if self._original_rotated_pole_state is not None and self.output_crs == 'input': ret.update_crs(self._original_rotated_pole_state) # wrap the data... if self._get_should_wrap_(ret): ret.wrap() # convert the coordinate system if requested... if self.should_update_crs: ret.update_crs(self.output_crs) return ret
def iter_src_grid_subsets(self, yield_dst=False): """ Yield source grid subsets using the extent of its associated destination grid subset. :param bool yield_dst: If ``True``, yield the destination subset as well as the source grid subset. :return: The source grid if ``yield_dst`` is ``False``, otherwise a three-element tuple in the form ``(<source grid subset>, <destination grid subset>, <destination grid slice>)``. :rtype: :class:`ocgis.Grid` or (:class:`ocgis.Grid`, :class:`ocgis.Grid`, dict) """ if yield_dst: yield_slice = True else: yield_slice = False if self.buffer_value is None: try: if self.dst_grid_resolution is None: dst_grid_resolution = self.dst_grid.resolution else: dst_grid_resolution = self.dst_grid_resolution if self.src_grid_resolution is None: src_grid_resolution = self.src_grid.resolution else: src_grid_resolution = self.src_grid_resolution if dst_grid_resolution <= src_grid_resolution: target_resolution = dst_grid_resolution else: target_resolution = src_grid_resolution buffer_value = 2. * target_resolution except NotImplementedError: # Unstructured grids do not have an associated resolution. if isinstance(self.src_grid, GridUnstruct) or isinstance(self.dst_grid, GridUnstruct): buffer_value = None else: raise else: buffer_value = self.buffer_value dst_grid_wrapped_state = self.dst_grid.wrapped_state dst_grid_crs = self.dst_grid.crs # Use a destination grid iterator if provided. if self.iter_dst is not None: iter_dst = self.iter_dst(self, yield_slice=yield_slice) else: iter_dst = self.iter_dst_grid_subsets(yield_slice=yield_slice) # Loop over each destination grid subset. for yld in iter_dst: if yield_slice: dst_grid_subset, dst_slice = yld else: dst_grid_subset = yld dst_box = None with vm.scoped_by_emptyable('extent_global', dst_grid_subset): if not vm.is_null: if self.check_contains: dst_box = box(*dst_grid_subset.extent_global) # Use the envelope! A buffer returns "fancy" borders. We just want to expand the bounding box. extent_global = dst_grid_subset.parent.attrs.get('extent_global') if extent_global is None: extent_global = dst_grid_subset.extent_global sub_box = box(*extent_global) if buffer_value is not None: sub_box = sub_box.buffer(buffer_value).envelope ocgis_lh(msg=str(sub_box.bounds), level=logging.DEBUG) else: sub_box, dst_box = [None, None] live_ranks = vm.get_live_ranks_from_object(dst_grid_subset) sub_box = vm.bcast(sub_box, root=live_ranks[0]) if self.check_contains: dst_box = vm.bcast(dst_box, root=live_ranks[0]) sub_box = GeometryVariable.from_shapely(sub_box, is_bbox=True, wrapped_state=dst_grid_wrapped_state, crs=dst_grid_crs) src_grid_subset, src_grid_slice = self.src_grid.get_intersects(sub_box, keep_touches=False, cascade=False, optimized_bbox_subset=self.optimized_bbox_subset, return_slice=True) # Reload the data using a new source index distribution. if hasattr(src_grid_subset, 'reduce_global'): # Only redistribute if we have one live rank. if self.redistribute and len(vm.get_live_ranks_from_object(src_grid_subset)) > 0: topology = src_grid_subset.abstractions_available[Topology.POLYGON] cindex = topology.cindex redist_dimname = self.src_grid.abstractions_available[Topology.POLYGON].element_dim.name if src_grid_subset.is_empty: redist_dim = None else: redist_dim = topology.element_dim redistribute_by_src_idx(cindex, redist_dimname, redist_dim) with vm.scoped_by_emptyable('src_grid_subset', src_grid_subset): if not vm.is_null: if not self.allow_masked: gmask = src_grid_subset.get_mask() if gmask is not None and gmask.any(): raise ValueError('Masked values in source grid subset.') if self.check_contains: src_box = box(*src_grid_subset.extent_global) if not does_contain(src_box, dst_box): raise ValueError('Contains check failed.') # Try to reduce the coordinates in the case of unstructured grid data. if hasattr(src_grid_subset, 'reduce_global'): src_grid_subset = src_grid_subset.reduce_global() else: src_grid_subset = VariableCollection(is_empty=True) if src_grid_subset.is_empty: src_grid_slice = None else: src_grid_slice = {src_grid_subset.dimensions[ii].name: src_grid_slice[ii] for ii in range(src_grid_subset.ndim)} if yield_dst: yld = (src_grid_subset, src_grid_slice, dst_grid_subset, dst_slice) else: yld = src_grid_subset, src_grid_slice yield yld