def test_write_variable_collection_object_arrays(self): """Test writing variable length arrays in parallel.""" with vm.scoped('write', [0]): if not vm.is_null: path_actual = self.get_temporary_file_path('in.nc') path_desired = self.get_temporary_file_path('out.nc') value = [[1, 3, 5], [7, 9], [11]] v = Variable(name='objects', value=value, fill_value=4, dtype=ObjectType(int), dimensions='values') v.write(path_desired) else: v, path_actual, path_desired = [None] * 3 path_actual = MPI_COMM.bcast(path_actual) path_desired = MPI_COMM.bcast(path_desired) dest_mpi = OcgDist() dest_mpi.create_dimension('values', 3, dist=True) dest_mpi.update_dimension_bounds() scattered = variable_scatter(v, dest_mpi) outvc = VariableCollection(variables=[scattered]) with vm.scoped_by_emptyable('write', outvc): if not vm.is_null: outvc.write(path_actual) if MPI_RANK == 0: self.assertNcEqual(path_actual, path_desired)
def test_write_variable_collection(self): if MPI_RANK == 0: path_in = self.get_temporary_file_path('foo.nc') path_out = self.get_temporary_file_path('foo_out.nc') with self.nc_scope(path_in, 'w') as ds: ds.createDimension('seven', 7) var = ds.createVariable('var_seven', float, dimensions=('seven', )) var[:] = np.arange(7, dtype=float) + 10 var.foo = 'bar' else: path_in, path_out = [None] * 2 path_in = MPI_COMM.bcast(path_in) path_out = MPI_COMM.bcast(path_out) rd = RequestDataset(path_in) rd.metadata['dimensions']['seven']['dist'] = True driver = DriverNetcdf(rd) vc = driver.get_variable_collection() with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path_out) if MPI_RANK == 0: self.assertNcEqual(path_in, path_out)
def test_variable_scatter(self): var_value = np.arange(5, dtype=float) + 50 var_mask = np.array([True, True, False, True, False]) dest_dist = OcgDist() five = dest_dist.create_dimension('five', 5, src_idx=np.arange(5), dist=True) bounds = dest_dist.create_dimension('bounds', 2) dest_dist.update_dimension_bounds() if MPI_RANK == 0: local_dim = Dimension('local', 5, src_idx=np.arange(5)) dim_src_idx = local_dim._src_idx.copy() var = Variable('the_five', value=var_value, mask=var_mask, dimensions=five.name) var.set_extrapolated_bounds('the_five_bounds', 'bounds') var_bounds_value = var.bounds.get_value() else: var, var_bounds_value, dim_src_idx = [None] * 3 svar = variable_scatter(var, dest_dist) var_bounds_value = MPI_COMM.bcast(var_bounds_value) dim_src_idx = MPI_COMM.bcast(dim_src_idx) if MPI_RANK > 1: self.assertIsNone(svar.get_value()) self.assertTrue(svar.is_empty) else: dest_dim = dest_dist.get_dimension('five') self.assertNumpyAll(var_value[slice(*dest_dim.bounds_local)], svar.get_value()) self.assertNumpyAll(var_mask[slice(*dest_dim.bounds_local)], svar.get_mask()) self.assertNumpyAll(var_bounds_value[slice(*dest_dim.bounds_local)], svar.bounds.get_value()) self.assertNumpyAll(dim_src_idx[slice(*dest_dim.bounds_local)], svar.dimensions[0]._src_idx) self.assertNumpyAll(dim_src_idx[slice(*dest_dim.bounds_local)], svar.bounds.dimensions[0]._src_idx)
def test_system_spatial_averaging_through_operations(self): data_name = 'data' with vm.scoped('write', [0]): if not vm.is_null: x = Variable('x', range(5), 'x', float) y = Variable('y', range(7), 'y', float) grid = Grid(x, y) data_value = np.arange(x.size * y.size).reshape(grid.shape) data = Variable(data_name, data_value, grid.dimensions, float) data_value = data.get_value() field = Field(grid=grid, is_data=data) path = self.get_temporary_file_path('data.nc') field.write(path) else: data_value, path = None, None data_value = MPI_COMM.bcast(data_value) path = MPI_COMM.bcast(path) rd = RequestDataset(path, variable=data_name) ops = OcgOperations(dataset=rd, aggregate=True) ret = ops.execute() if ret is None: self.assertNotEqual(vm.rank, vm.root) else: out_field = ret.get_element() if MPI_RANK == 0: desired = data_value.mean() actual = out_field.data_variables[0].get_value()[0] self.assertEqual(actual, desired)
def test_write_variable_collection_parallel(self): if MPI_RANK == 0: path1 = self.get_temporary_file_path('out1.shp') path2 = self.get_temporary_file_path('out2.shp') else: path1, path2 = [None] * 2 path1 = MPI_COMM.bcast(path1) path2 = MPI_COMM.bcast(path2) # Test writing the field to file. driver = self.get_driver() field = driver.create_field() # Only test open file objects on a single processor. if MPI_SIZE == 1: fiona_crs = get_fiona_crs(field) fiona_schema = get_fiona_schema(field.geom.geom_type, six.next(field.iter())[1]) fobject = fiona.open(path2, mode='w', schema=fiona_schema, crs=fiona_crs, driver='ESRI Shapefile') else: fobject = None for target in [path1, fobject]: # Skip the open file object test during a multi-proc test. if MPI_SIZE > 1 and target is None: continue field.write(target, driver=DriverVector) if isinstance(target, six.string_types): path = path1 else: path = path2 fobject.close() if MPI_RANK == 0: with fiona.open(path) as source: self.assertEqual(len(source), 51) rd = RequestDataset(uri=path) field2 = rd.get() for v in list(field.values()): if isinstance(v, CoordinateReferenceSystem): self.assertEqual(v, field2.crs) else: self.assertNumpyAll(v.get_value(), field2[v.name].get_value())
def test_get_dist_default_distribution(self): """Test using default distributions defined by drivers.""" with vm.scoped('write', [0]): if not vm.is_null: path = self.get_temporary_file_path('foo.nc') varx = Variable('x', np.arange(5), dimensions='five', attrs={'axis': 'X'}) vary = Variable('y', np.arange(7) + 10, dimensions='seven', attrs={'axis': 'Y'}) vc = VariableCollection(variables=[varx, vary]) vc.write(path) else: path = None path = MPI_COMM.bcast(path) rd = RequestDataset(path) dist = rd.driver.dist distributed_dimension = dist.get_dimension('seven') self.assertTrue(distributed_dimension.dist)
def test_system_cf_data_write_parallel(self): """Test some basic reading operations.""" if MPI_RANK == 0: path_out = self.get_temporary_file_path('foo.nc') else: path_out = None path_out = MPI_COMM.bcast(path_out) rd = self.test_data.get_rd('cancm4_tas') rd.metadata['dimensions']['lat']['dist'] = True rd.metadata['dimensions']['lon']['dist'] = True field = rd.get() field.write(path_out, dataset_kwargs={'format': rd.metadata['file_format']}) if MPI_RANK == 0: ignore_attributes = { 'time_bnds': ['units', 'calendar'], 'lat_bnds': ['units'], 'lon_bnds': ['units'], 'tas': ['grid_mapping'] } self.assertNcEqual(path_out, rd.uri, ignore_variables=['latitude_longitude'], ignore_attributes=ignore_attributes)
def test_system_parallel_write_ndvariable(self): """Test a parallel CSV write with a n-dimensional variable.""" ompi = OcgDist() ompi.create_dimension('time', 3) ompi.create_dimension('extra', 2) ompi.create_dimension('x', 4) ompi.create_dimension('y', 7, dist=True) ompi.update_dimension_bounds() if MPI_RANK == 0: path = self.get_temporary_file_path('foo.csv') t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time') t.set_extrapolated_bounds('the_time_bounds', 'bounds') extra = Variable(name='extra', value=[7, 8], dimensions='extra') x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float) x.set_extrapolated_bounds('x_bounds', 'bounds') # This will have the distributed dimension. y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float) y.set_extrapolated_bounds('y_bounds', 'bounds') data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x']) vc = VariableCollection(variables=[t, extra, x, y, data]) else: path, vc = [None] * 2 path = MPI_COMM.bcast(path) vc = variable_collection_scatter(vc, ompi) with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path, iter_kwargs={ 'variable': 'data', 'followers': ['time', 'extra', 'y', 'x'] }, driver=DriverCSV) if MPI_RANK == 0: desired = 169 with open(path, 'r') as f: lines = f.readlines() self.assertEqual(len(lines), desired)
def test_system_get_field_from_file(self): """Test returning a distributed field from file.""" field = self.get_field(nrow=5, ncol=7) if MPI_RANK == 0: path = self.get_temporary_file_path('data.nc') else: path = None path = MPI_COMM.bcast(path) with vm.scoped('write test field', [0]): if MPI_RANK == 0: field.write(path) MPI_COMM.Barrier() rd = RequestDataset(path) out_field = rd.get() if MPI_SIZE == 8: self.assertEqual(vm.size, 8) if MPI_RANK == 0: path2 = self.get_temporary_file_path('out_field.nc') else: path2 = None path2 = MPI_COMM.bcast(path2) with vm.scoped_by_emptyable('out_field write', out_field): if not vm.is_null: out_field.write(path2) MPI_COMM.Barrier() with vm.scoped('get actual', [0]): if MPI_RANK == 0: actual = RequestDataset(path2).get() actual = actual.data_variables[0].get_value().sum() else: actual = None actual = MPI_COMM.bcast(actual) desired = field.data_variables[0].get_value().sum() self.assertAlmostEqual(actual, desired)
def test_get_spatial_subset(self): ctr_test = 0 for ss, k in self: for geometry_record in self.get_subset_geometries(): for operation in ['intersects', 'clip', 'foo']: # if ctr_test != 18: # ctr_test += 1 # continue if MPI_RANK == 0: output_path = self.get_temporary_file_path( 'file-{}.nc'.format(ctr_test)) else: output_path = None output_path = MPI_COMM.bcast(output_path) ctr_test += 1 use_geometry = deepcopy(geometry_record['geom']) use_ss = deepcopy(ss) try: ret = use_ss.get_spatial_subset(operation, use_geometry, use_spatial_index=True, buffer_value=None, buffer_crs=None, geom_crs=WGS84()) except ValueError: # 'foo' is not a valid type of subset operation. if operation == 'foo': continue else: raise except EmptySubsetError: try: self.assertEqual( list(k.target.keys())[0], 'lambert') self.assertEqual( geometry_record['properties']['DESC'], 'Germany') except AssertionError: self.assertEqual( list(k.target.keys())[0], 'rotated_pole') self.assertEqual( geometry_record['properties']['DESC'], 'Nebraska') continue else: self.assertIsInstance(ret, Field) with vm.scoped_by_emptyable('write', ret): if not vm.is_null: ret.write(output_path) self.assertGreater(ctr_test, 5)
def test_system_parallel_write(self): if MPI_RANK == 0: in_path = self.get_path_to_template_csv() out_path = self.get_temporary_file_path('foo_out.csv') else: in_path, out_path = [None] * 2 in_path = MPI_COMM.bcast(in_path) out_path = MPI_COMM.bcast(out_path) rd = RequestDataset(in_path) list(rd.metadata['dimensions'].values())[0]['dist'] = True field = rd.get_field() with vm.scoped_by_emptyable('vc.write', field): if not vm.is_null: field.write(out_path, driver=DriverCSV) if MPI_RANK == 0: self.assertCSVFilesEqual(in_path, out_path)
def test_get_intersection_state_boundaries(self): path_shp = self.path_state_boundaries geoms = [] with fiona.open(path_shp) as source: for record in source: geom = shape(record['geometry']) geoms.append(geom) gvar = GeometryVariable(value=geoms, dimensions='ngeom') gvar_sub = gvar.get_unioned() if gvar_sub is not None: subset = gvar_sub.get_value().flatten()[0] else: subset = None subset = MPI_COMM.bcast(subset) resolution = 2.0 keywords = dict(with_bounds=[False]) for k in self.iter_product_keywords(keywords): grid = self.get_gridxy_global(resolution=resolution, with_bounds=k.with_bounds) res = grid.get_intersection(subset) if not res.is_empty: self.assertTrue(res.get_mask().any()) else: self.assertIsInstance(res, GeometryVariable) if k.with_bounds: area = res.area if area is None: area = 0.0 else: area = area.sum() areas = MPI_COMM.gather(area) if MPI_RANK == 0: area_global = sum(areas) self.assertAlmostEqual(area_global, 1096.0819224080542) else: mask = res.get_mask() if mask is None: masked = 0 else: masked = mask.sum() masked = MPI_COMM.gather(masked) if MPI_RANK == 0: total_masked = sum(masked) self.assertEqual(total_masked, 858)
def test_system_spatial_averaging_from_file(self): rd_nc = self.test_data.get_rd('cancm4_tas') rd_shp = RequestDataset(self.path_state_boundaries) field_shp = rd_shp.get() actual = field_shp.dimension_map.get_variable(DMK.GEOM) self.assertIsNotNone(actual) actual = field_shp.dimension_map.get_dimension(DMK.GEOM) self.assertEqual(len(actual), 1) self.assertEqual(field_shp.crs, WGS84()) try: index_geom = np.where( field_shp['STATE_NAME'].get_value() == 'Nebraska')[0][0] except IndexError: # Not found on rank. polygon_field = None else: polygon_field = field_shp.get_field_slice({'geom': index_geom}) polygon_field = MPI_COMM.gather(polygon_field) if MPI_RANK == 0: for p in polygon_field: if p is not None: polygon_field = p break polygon_field = MPI_COMM.bcast(polygon_field) polygon_field.unwrap() polygon = polygon_field.geom.get_value()[0] field_nc = rd_nc.get() sub_field_nc = field_nc.get_field_slice({'time': slice(0, 10)}) self.assertEqual(sub_field_nc['tas']._dimensions, field_nc['tas']._dimensions) sub = sub_field_nc.grid.get_intersects(polygon) # When split across two processes, there are floating point summing differences. desired = {1: 2734.5195, 2: 2740.4014} with vm.scoped_by_emptyable('grid intersects', sub): if not vm.is_null: abstraction_geometry = sub.get_abstraction_geometry() sub.parent.add_variable(abstraction_geometry, force=True) unioned = abstraction_geometry.get_unioned( spatial_average='tas') if unioned is not None: tas = unioned.parent['tas'] self.assertFalse(tas.is_empty) self.assertAlmostEqual(tas.get_value().sum(), desired[vm.size], places=4)
def test_system_parallel_write_ndvariable(self): """Test a parallel vector GIS write with a n-dimensional variable.""" ompi = OcgDist() ompi.create_dimension('time', 3) ompi.create_dimension('extra', 2) ompi.create_dimension('x', 4) ompi.create_dimension('y', 7, dist=True) ompi.update_dimension_bounds() if MPI_RANK == 0: path = self.get_temporary_file_path('foo.shp') t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time') t.set_extrapolated_bounds('the_time_bounds', 'bounds') extra = Variable(name='extra', value=[7, 8], dimensions='extra') x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float) x.set_extrapolated_bounds('x_bounds', 'bounds') # This will have the distributed dimension. y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float) y.set_extrapolated_bounds('y_bounds', 'bounds') data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x']) dimension_map = {'x': {'variable': 'x', 'bounds': 'x_bounds'}, 'y': {'variable': 'y', 'bounds': 'y_bounds'}, 'time': {'variable': 'time', 'bounds': 'the_time_bounds'}} vc = Field(variables=[t, extra, x, y, data], dimension_map=dimension_map, is_data='data') vc.set_abstraction_geom() else: path, vc = [None] * 2 path = MPI_COMM.bcast(path) vc = variable_collection_scatter(vc, ompi) with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path, driver=DriverVector) MPI_COMM.Barrier() desired = 168 rd = RequestDataset(path, driver=DriverVector) sizes = MPI_COMM.gather(rd.get().geom.shape[0]) if MPI_RANK == 0: self.assertEqual(sum(sizes), desired)
def test_system_spatial_averaging_from_file(self): rd_nc = self.test_data.get_rd('cancm4_tas') rd_shp = RequestDataset(self.path_state_boundaries) field_shp = rd_shp.get() actual = field_shp.dimension_map.get_variable(DMK.GEOM) self.assertIsNotNone(actual) actual = field_shp.dimension_map.get_dimension(DMK.GEOM) self.assertEqual(len(actual), 1) self.assertEqual(field_shp.crs, WGS84()) try: index_geom = np.where(field_shp['STATE_NAME'].get_value() == 'Nebraska')[0][0] except IndexError: # Not found on rank. polygon_field = None else: polygon_field = field_shp.get_field_slice({'geom': index_geom}) polygon_field = MPI_COMM.gather(polygon_field) if MPI_RANK == 0: for p in polygon_field: if p is not None: polygon_field = p break polygon_field = MPI_COMM.bcast(polygon_field) polygon_field.unwrap() polygon = polygon_field.geom.get_value()[0] field_nc = rd_nc.get() sub_field_nc = field_nc.get_field_slice({'time': slice(0, 10)}) self.assertEqual(sub_field_nc['tas']._dimensions, field_nc['tas']._dimensions) sub = sub_field_nc.grid.get_intersects(polygon) # When split across two processes, there are floating point summing differences. desired = {1: 2734.5195, 2: 2740.4014} with vm.scoped_by_emptyable('grid intersects', sub): if not vm.is_null: abstraction_geometry = sub.get_abstraction_geometry() sub.parent.add_variable(abstraction_geometry, force=True) unioned = abstraction_geometry.get_unioned(spatial_average='tas') if unioned is not None: tas = unioned.parent['tas'] self.assertFalse(tas.is_empty) self.assertAlmostEqual(tas.get_value().sum(), desired[vm.size], places=4)
def test_system_parallel_write_ndvariable(self): """Test a parallel CSV write with a n-dimensional variable.""" ompi = OcgDist() ompi.create_dimension('time', 3) ompi.create_dimension('extra', 2) ompi.create_dimension('x', 4) ompi.create_dimension('y', 7, dist=True) ompi.update_dimension_bounds() if MPI_RANK == 0: path = self.get_temporary_file_path('foo.csv') t = TemporalVariable(name='time', value=[1, 2, 3], dtype=float, dimensions='time') t.set_extrapolated_bounds('the_time_bounds', 'bounds') extra = Variable(name='extra', value=[7, 8], dimensions='extra') x = Variable(name='x', value=[9, 10, 11, 12], dimensions='x', dtype=float) x.set_extrapolated_bounds('x_bounds', 'bounds') # This will have the distributed dimension. y = Variable(name='y', value=[13, 14, 15, 16, 17, 18, 19], dimensions='y', dtype=float) y.set_extrapolated_bounds('y_bounds', 'bounds') data = Variable(name='data', value=np.random.rand(3, 2, 7, 4), dimensions=['time', 'extra', 'y', 'x']) vc = VariableCollection(variables=[t, extra, x, y, data]) else: path, vc = [None] * 2 path = MPI_COMM.bcast(path) vc = variable_collection_scatter(vc, ompi) with vm.scoped_by_emptyable('write', vc): if not vm.is_null: vc.write(path, iter_kwargs={'variable': 'data', 'followers': ['time', 'extra', 'y', 'x']}, driver=DriverCSV) if MPI_RANK == 0: desired = 169 with open(path, 'r') as f: lines = f.readlines() self.assertEqual(len(lines), desired)
def test_write_parallel(self): """Test writing by selective rank.""" if MPI_SIZE != 3 and MPI_SIZE != 1: raise SkipTest('MPI_SIZE != 1 or 3') ranks = list(range(MPI_SIZE)) for base_rank in ranks: for driver in [DriverCSV, DriverVector, DriverNetcdf]: if MPI_RANK == 0: path = self.get_temporary_file_path('{}-{}.{}'.format( driver.key, base_rank, driver.common_extension)) else: path = None path = MPI_COMM.bcast(path) with vm.scoped('field write by rank', [base_rank]): if not vm.is_null: geom = GeometryVariable( value=[Point(1, 2), Point(3, 4)], name='geom', dimensions='geom') data = Variable(name='data', value=[10, 20], dimensions='geom') field = Field(geom=geom) field.add_variable(data, is_data=True) self.assertFalse(os.path.isdir(path)) field.write(path, driver=driver) self.assertFalse(os.path.isdir(path)) rd = RequestDataset(path, driver=driver) in_field = rd.get() self.assertEqual(in_field['data'].dimensions[0].size, 2) MPI_COMM.Barrier() MPI_COMM.Barrier()
def test_write_parallel(self): """Test writing by selective rank.""" if MPI_SIZE != 3 and MPI_SIZE != 1: raise SkipTest('MPI_SIZE != 1 or 3') ranks = list(range(MPI_SIZE)) for base_rank in ranks: for driver in [ DriverCSV, DriverVector, DriverNetcdf ]: if MPI_RANK == 0: path = self.get_temporary_file_path('{}-{}.{}'.format(driver.key, base_rank, driver.common_extension)) else: path = None path = MPI_COMM.bcast(path) with vm.scoped('field write by rank', [base_rank]): if not vm.is_null: geom = GeometryVariable(value=[Point(1, 2), Point(3, 4)], name='geom', dimensions='geom') data = Variable(name='data', value=[10, 20], dimensions='geom') field = Field(geom=geom) field.add_variable(data, is_data=True) self.assertFalse(os.path.isdir(path)) field.write(path, driver=driver) self.assertFalse(os.path.isdir(path)) rd = RequestDataset(path, driver=driver) in_field = rd.get() self.assertEqual(in_field['data'].dimensions[0].size, 2) MPI_COMM.Barrier() MPI_COMM.Barrier()
def test_get_intersects_parallel(self): if sys.version_info.major == 3 and sys.version_info.minor == 5: raise SkipTest('undefined behavior with Python 3.5') grid = self.get_gridxy() live_ranks = vm.get_live_ranks_from_object(grid) # Test with an empty subset. subset_geom = box(1000., 1000., 1100., 1100.) with vm.scoped('empty subset', live_ranks): if not vm.is_null: with self.assertRaises(EmptySubsetError): grid.get_intersects(subset_geom) # Test combinations. subset_geom = box(101.5, 40.5, 102.5, 42.) keywords = dict(is_vectorized=[True, False], has_bounds=[False, True], use_bounds=[False, True], keep_touches=[True, False]) for ctr, k in enumerate(self.iter_product_keywords(keywords)): grid = self.get_gridxy() vm_name, _ = vm.create_subcomm_by_emptyable('grid testing', grid, is_current=True) if vm.is_null: vm.free_subcomm(name=vm_name) vm.set_comm() continue if k.has_bounds: grid.set_extrapolated_bounds('xbounds', 'ybounds', 'bounds') self.assertTrue(grid.has_bounds) # Cannot use bounds with a point grid abstraction. if k.use_bounds and grid.abstraction == 'point': vm.free_subcomm(name=vm_name) vm.set_comm() continue grid_sub, slc = grid.get_intersects(subset_geom, keep_touches=k.keep_touches, use_bounds=k.use_bounds, return_slice=True) if k.has_bounds: self.assertTrue(grid.has_bounds) # Test geometries are filled appropriately after allocation. if not grid_sub.is_empty: for t in grid_sub.get_abstraction_geometry().get_value().flat: self.assertIsInstance(t, BaseGeometry) self.assertIsInstance(grid_sub, Grid) if k.keep_touches: if k.has_bounds and k.use_bounds: desired = (slice(0, 3, None), slice(0, 3, None)) else: desired = (slice(1, 3, None), slice(1, 2, None)) else: if k.has_bounds and k.use_bounds: desired = (slice(1, 3, None), slice(1, 2, None)) else: desired = (slice(1, 2, None), slice(1, 2, None)) if not grid.is_empty: self.assertEqual(grid.has_bounds, k.has_bounds) self.assertTrue(grid.is_vectorized) self.assertEqual(slc, desired) vm.free_subcomm(name=vm_name) vm.set_comm() # Test against a file. ######################################################################################### subset_geom = box(101.5, 40.5, 102.5, 42.) if MPI_RANK == 0: path_grid = self.get_temporary_file_path('grid.nc') else: path_grid = None path_grid = MPI_COMM.bcast(path_grid) grid_to_write = self.get_gridxy() with vm.scoped_by_emptyable('write', grid_to_write): if not vm.is_null: field = Field(grid=grid_to_write) field.write(path_grid, driver=DriverNetcdfCF) MPI_COMM.Barrier() rd = RequestDataset(uri=path_grid) x = SourcedVariable(name='x', request_dataset=rd) self.assertIsNone(x._value) y = SourcedVariable(name='y', request_dataset=rd) self.assertIsNone(x._value) self.assertIsNone(y._value) grid = Grid(x, y) for target in [grid._y_name, grid._x_name]: self.assertIsNone(grid.parent[target]._value) self.assertTrue(grid.is_vectorized) with vm.scoped_by_emptyable('intersects', grid): if not vm.is_null: sub, slc = grid.get_intersects(subset_geom, return_slice=True) self.assertEqual(slc, (slice(1, 3, None), slice(1, 2, None))) self.assertIsInstance(sub, Grid) # The file may be deleted before other ranks open. MPI_COMM.Barrier()
def test_system_spatial_averaging_through_operations_state_boundaries(self): if MPI_SIZE != 8: raise SkipTest('MPI_SIZE != 8') ntime = 3 # Get the exact field value for the state's representative center. with vm.scoped([0]): if MPI_RANK == 0: states = RequestDataset(self.path_state_boundaries, driver='vector').get() states.update_crs(env.DEFAULT_COORDSYS) fill = np.zeros((states.geom.shape[0], 2)) for idx, geom in enumerate(states.geom.get_value().flat): centroid = geom.centroid fill[idx, :] = centroid.x, centroid.y exact_states = create_exact_field_value(fill[:, 0], fill[:, 1]) state_ugid = states['UGID'].get_value() area = states.geom.area keywords = { 'spatial_operation': [ 'clip', 'intersects' ], 'aggregate': [ True, False ], 'wrapped': [True, False], 'output_format': [ OutputFormatName.OCGIS, 'csv', 'csv-shp', 'shp' ], } # total_iterations = len(list(self.iter_product_keywords(keywords))) for ctr, k in enumerate(self.iter_product_keywords(keywords)): # barrier_print(k) # if ctr % 1 == 0: # if vm.is_root: # print('Iteration {} of {}...'.format(ctr + 1, total_iterations)) with vm.scoped([0]): if vm.is_root: grid = create_gridxy_global(resolution=1.0, dist=False, wrapped=k.wrapped) field = create_exact_field(grid, 'foo', ntime=ntime) path = self.get_temporary_file_path('foo.nc') field.write(path) else: path = None path = MPI_COMM.bcast(path) rd = RequestDataset(path) ops = OcgOperations(dataset=rd, geom='state_boundaries', spatial_operation=k.spatial_operation, aggregate=k.aggregate, output_format=k.output_format, prefix=str(ctr), # geom_select_uid=[8] ) ret = ops.execute() # Test area is preserved for a problem element during union. The union's geometry was not fully represented # in the output. if k.output_format == 'shp' and k.aggregate and k.spatial_operation == 'clip': with vm.scoped([0]): if vm.is_root: inn = RequestDataset(ret).get() inn_ugid_idx = np.where(inn['UGID'].get_value() == 8)[0][0] ugid_idx = np.where(state_ugid == 8)[0][0] self.assertAlmostEqual(inn.geom.get_value()[inn_ugid_idx].area, area[ugid_idx], places=2) # Test the overview geometry shapefile is written. if k.output_format == 'shp': directory = os.path.split(ret)[0] contents = os.listdir(directory) actual = ['_ugid.shp' in c for c in contents] self.assertTrue(any(actual)) elif k.output_format == 'csv-shp': directory = os.path.split(ret)[0] directory = os.path.join(directory, 'shp') contents = os.listdir(directory) actual = ['_ugid.shp' in c for c in contents] self.assertTrue(any(actual)) if not k.aggregate: actual = ['_gid.shp' in c for c in contents] self.assertTrue(any(actual)) if k.output_format == OutputFormatName.OCGIS: geom_keys = ret.children.keys() all_geom_keys = vm.gather(np.array(geom_keys)) if vm.is_root: all_geom_keys = hgather(all_geom_keys) self.assertEqual(len(np.unique(all_geom_keys)), 51) if k.aggregate: actual = Dict() for field, container in ret.iter_fields(yield_container=True): if not field.is_empty: ugid = container.geom.ugid.get_value()[0] actual[ugid]['actual'] = field.data_variables[0].get_value() actual[ugid]['area'] = container.geom.area[0] actual = vm.gather(actual) if vm.is_root: actual = dgather(actual) ares = [] actual_areas = [] for ugid_key, v in actual.items(): ugid_idx = np.where(state_ugid == ugid_key)[0][0] desired = exact_states[ugid_idx] actual_areas.append(v['area']) for tidx in range(ntime): are = np.abs((desired + ((tidx + 1) * 10)) - v['actual'][tidx, 0]) ares.append(are) if k.spatial_operation == 'clip': diff = np.abs(np.array(area) - np.array(actual_areas)) self.assertLess(np.max(diff), 1e-6) self.assertLess(np.mean(diff), 1e-6) # Test relative errors. self.assertLess(np.max(ares), 0.031) self.assertLess(np.mean(ares), 0.009)
def test_get_intersects_state_boundaries(self): path_shp = self.path_state_boundaries geoms = [] with fiona.open(path_shp) as source: for record in source: geom = shape(record['geometry']) geoms.append(geom) gvar = GeometryVariable(value=geoms, dimensions='ngeom') gvar_sub = gvar.get_unioned() if gvar_sub is not None: subset = gvar_sub.get_value().flatten()[0] else: subset = None subset = MPI_COMM.bcast(subset) resolution = 1.0 for with_bounds in [False, True]: grid = self.get_gridxy_global(resolution=resolution, with_bounds=with_bounds) vm.create_subcomm_by_emptyable('global grid', grid, is_current=True) if not vm.is_null: res = grid.get_intersects(subset, return_slice=True) grid_sub, slc = res vm.create_subcomm_by_emptyable('grid subset', grid_sub, is_current=True) if not vm.is_null: mask = Variable('mask_after_subset', grid_sub.get_mask(), dimensions=grid_sub.dimensions) mask = variable_gather(mask) if vm.rank == 0: mask_sum = np.invert(mask.get_value()).sum() mask_shape = mask.shape else: mask_sum = None mask_shape = None mask_sum = vm.bcast(mask_sum) mask_shape = vm.bcast(mask_shape) if with_bounds: self.assertEqual(mask_shape, (54, 113)) self.assertEqual( slc, (slice(108, 162, None), slice(1, 114, None))) self.assertEqual(mask_sum, 1358) else: if MPI_SIZE == 2: grid_bounds_global = [ dim.bounds_global for dim in grid_sub.dimensions ] self.assertEqual(grid_bounds_global, [(0, 52), (0, 105)]) self.assertEqual(mask_shape, (52, 105)) self.assertEqual( slc, (slice(109, 161, None), slice(8, 113, None))) self.assertEqual(mask_sum, 1087) if vm.rank == 0: path = self.get_temporary_file_path('foo.nc') else: path = None path = vm.bcast(path) field = Field(grid=grid_sub) field.write(path) vm.finalize() vm.__init__() MPI_COMM.Barrier()