def test_write_variable_collection_object_arrays(self): """Test writing variable length arrays in parallel.""" with vm.scoped('write', [0]): if not vm.is_null: path_actual = self.get_temporary_file_path('in.nc') path_desired = self.get_temporary_file_path('out.nc') value = [[1, 3, 5], [7, 9], [11]] v = Variable(name='objects', value=value, fill_value=4, dtype=ObjectType(int), dimensions='values') v.write(path_desired) else: v, path_actual, path_desired = [None] * 3 path_actual = MPI_COMM.bcast(path_actual) path_desired = MPI_COMM.bcast(path_desired) dest_mpi = OcgDist() dest_mpi.create_dimension('values', 3, dist=True) dest_mpi.update_dimension_bounds() scattered = variable_scatter(v, dest_mpi) outvc = VariableCollection(variables=[scattered]) with vm.scoped_by_emptyable('write', outvc): if not vm.is_null: outvc.write(path_actual) if MPI_RANK == 0: self.assertNcEqual(path_actual, path_desired)
def test_write_variable_collection_object_arrays(self): """Test writing variable length arrays in parallel.""" with vm.scoped('write', [0]): if not vm.is_null: path_actual = self.get_temporary_file_path('in.nc') path_desired = self.get_temporary_file_path('out.nc') value = [[1, 3, 5], [7, 9], [11]] v = Variable(name='objects', value=value, fill_value=4, dtype=ObjectType(int), dimensions='values') v.write(path_desired) else: v, path_actual, path_desired = [None] * 3 path_actual = MPI_COMM.bcast(path_actual) path_desired = MPI_COMM.bcast(path_desired) dest_mpi = OcgDist() dest_mpi.create_dimension('values', 3, dist=True) dest_mpi.update_dimension_bounds() scattered = variable_scatter(v, dest_mpi) outvc = VariableCollection(variables=[scattered]) with vm.scoped_by_emptyable('write', outvc): if not vm.is_null: outvc.write(path_actual) if MPI_RANK == 0: self.assertNcEqual(path_actual, path_desired)
def test_system_with_distributed_dimensions_from_file_shapefile(self): """Test a distributed read from file.""" path = self.path_state_boundaries # These are the desired values. with vm.scoped('desired data write', [0]): if not vm.is_null: rd_desired = RequestDataset(uri=path, driver=DriverVector) var_desired = SourcedVariable(name='STATE_NAME', request_dataset=rd_desired) value_desired = var_desired.get_value().tolist() self.assertEqual(len(value_desired), 51) rd = RequestDataset(uri=path, driver=DriverVector) fvar = SourcedVariable(name='STATE_NAME', request_dataset=rd) self.assertEqual(len(rd.driver.dist.get_group()['dimensions']), 1) self.assertTrue(fvar.dimensions[0].dist) self.assertIsNotNone(fvar.get_value()) if MPI_SIZE > 1: self.assertLessEqual(fvar.shape[0], 26) values = MPI_COMM.gather(fvar.get_value()) if MPI_RANK == 0: values = hgather(values) self.assertEqual(values.tolist(), value_desired) else: self.assertIsNone(values)
def test_get_dist_default_distribution(self): """Test using default distributions defined by drivers.""" with vm.scoped('write', [0]): if not vm.is_null: path = self.get_temporary_file_path('foo.nc') varx = Variable('x', np.arange(5), dimensions='five', attrs={'axis': 'X'}) vary = Variable('y', np.arange(7) + 10, dimensions='seven', attrs={'axis': 'Y'}) vc = VariableCollection(variables=[varx, vary]) vc.write(path) else: path = None path = MPI_COMM.bcast(path) rd = RequestDataset(path) dist = rd.driver.dist distributed_dimension = dist.get_dimension('seven') self.assertTrue(distributed_dimension.dist)
def test_variable_gather(self): dist = OcgDist() three = dist.create_dimension('three', 3, src_idx=np.arange(3) * 10) four = dist.create_dimension('four', 4, src_idx=np.arange(4, dtype=np.int32), dist=True) dist.create_variable('four', dimensions=[three, four]) dist.update_dimension_bounds() if MPI_RANK == 0: np.random.seed(1) mask_value = np.random.random(12).reshape(3, 4) mask = Variable('mask', value=mask_value, dimensions=['three', 'four']) else: mask = None mask = variable_scatter(mask, dist) with vm.scoped('mask gather', dist.get_empty_ranks(inverse=True)): if not vm.is_null: mask_gather = variable_gather(mask) else: mask_gather = None if MPI_RANK == 0: self.assertNumpyAll(mask_gather.get_value(), mask_value) self.assertNumpyAll(mask_gather.dimensions[0]._src_idx, np.arange(3) * 10) self.assertNumpyAll(mask_gather.dimensions[1]._src_idx, np.arange(4, dtype=DataType.DIMENSION_SRC_INDEX)) for dim in mask_gather.dimensions: self.assertFalse(dim.dist) else: self.assertIsNone(mask_gather)
def test_system_spatial_averaging_through_operations(self): data_name = 'data' with vm.scoped('write', [0]): if not vm.is_null: x = Variable('x', range(5), 'x', float) y = Variable('y', range(7), 'y', float) grid = Grid(x, y) data_value = np.arange(x.size * y.size).reshape(grid.shape) data = Variable(data_name, data_value, grid.dimensions, float) data_value = data.get_value() field = Field(grid=grid, is_data=data) path = self.get_temporary_file_path('data.nc') field.write(path) else: data_value, path = None, None data_value = MPI_COMM.bcast(data_value) path = MPI_COMM.bcast(path) rd = RequestDataset(path, variable=data_name) ops = OcgOperations(dataset=rd, aggregate=True) ret = ops.execute() if ret is None: self.assertNotEqual(vm.rank, vm.root) else: out_field = ret.get_element() if MPI_RANK == 0: desired = data_value.mean() actual = out_field.data_variables[0].get_value()[0] self.assertEqual(actual, desired)
def test_write_variable_fill_value_is_maintained(self): if vm.size != 4: raise SkipTest('vm.size != 4') dist = OcgDist() dim = dist.create_dimension('dim', 8, dist=True) dist.update_dimension_bounds() var = Variable(name='var', dimensions=dim, fill_value=2.) var.v()[0] = 1 var.v()[1] = 2 var.get_mask(create=True, check_value=True) if vm.rank == 0: path = self.get_temporary_file_path('foo.nc') else: path = None path = vm.bcast(path) var.parent.write(path) # if vm.rank == 0: # self.ncdump(path, header_only=False) with vm.scoped('read test', [0]): if not vm.is_null: invar = RequestDataset(path).create_field()['var'] self.assertEqual(invar.get_mask().sum(), 4) self.assertEqual(invar.fill_value, 2.)
def test_system_with_distributed_dimensions_from_file_shapefile(self): """Test a distributed read from file.""" path = self.path_state_boundaries # These are the desired values. with vm.scoped('desired data write', [0]): if not vm.is_null: rd_desired = RequestDataset(uri=path, driver=DriverVector) var_desired = SourcedVariable(name='STATE_NAME', request_dataset=rd_desired) value_desired = var_desired.get_value().tolist() self.assertEqual(len(value_desired), 51) rd = RequestDataset(uri=path, driver=DriverVector) fvar = SourcedVariable(name='STATE_NAME', request_dataset=rd) self.assertEqual(len(rd.driver.dist.get_group()['dimensions']), 1) self.assertTrue(fvar.dimensions[0].dist) self.assertIsNotNone(fvar.get_value()) if MPI_SIZE > 1: self.assertLessEqual(fvar.shape[0], 26) values = MPI_COMM.gather(fvar.get_value()) if MPI_RANK == 0: values = hgather(values) self.assertEqual(values.tolist(), value_desired) else: self.assertIsNone(values)
def test_system_get_field_from_file(self): """Test returning a distributed field from file.""" field = self.get_field(nrow=5, ncol=7) if MPI_RANK == 0: path = self.get_temporary_file_path('data.nc') else: path = None path = MPI_COMM.bcast(path) with vm.scoped('write test field', [0]): if MPI_RANK == 0: field.write(path) MPI_COMM.Barrier() rd = RequestDataset(path) out_field = rd.get() if MPI_SIZE == 8: self.assertEqual(vm.size, 8) if MPI_RANK == 0: path2 = self.get_temporary_file_path('out_field.nc') else: path2 = None path2 = MPI_COMM.bcast(path2) with vm.scoped_by_emptyable('out_field write', out_field): if not vm.is_null: out_field.write(path2) MPI_COMM.Barrier() with vm.scoped('get actual', [0]): if MPI_RANK == 0: actual = RequestDataset(path2).get() actual = actual.data_variables[0].get_value().sum() else: actual = None actual = MPI_COMM.bcast(actual) desired = field.data_variables[0].get_value().sum() self.assertAlmostEqual(actual, desired)
def test_system_get_field_from_file(self): """Test returning a distributed field from file.""" field = self.get_field(nrow=5, ncol=7) if MPI_RANK == 0: path = self.get_temporary_file_path('data.nc') else: path = None path = MPI_COMM.bcast(path) with vm.scoped('write test field', [0]): if MPI_RANK == 0: field.write(path) MPI_COMM.Barrier() rd = RequestDataset(path) out_field = rd.get() if MPI_SIZE == 8: self.assertEqual(vm.size, 8) if MPI_RANK == 0: path2 = self.get_temporary_file_path('out_field.nc') else: path2 = None path2 = MPI_COMM.bcast(path2) with vm.scoped_by_emptyable('out_field write', out_field): if not vm.is_null: out_field.write(path2) MPI_COMM.Barrier() with vm.scoped('get actual', [0]): if MPI_RANK == 0: actual = RequestDataset(path2).get() actual = actual.data_variables[0].get_value().sum() else: actual = None actual = MPI_COMM.bcast(actual) desired = field.data_variables[0].get_value().sum() self.assertAlmostEqual(actual, desired)
def test_comm_world(self): if MPI_SIZE != 2: raise SkipTest('MPI_SIZE != 2') self.assertEqual(vm.size, 2) self.assertEqual(vm.comm_world.Get_size(), 2) with vm.scoped('comm world test', [1]): if not vm.is_null: self.assertEqual(vm.size, 1) self.assertEqual(vm.comm_world.Get_size(), 2)
def test_comm_world(self): if MPI_SIZE != 2: raise SkipTest('MPI_SIZE != 2') self.assertEqual(vm.size, 2) self.assertEqual(vm.comm_world.Get_size(), 2) with vm.scoped('comm world test', [1]): if not vm.is_null: self.assertEqual(vm.size, 1) self.assertEqual(vm.comm_world.Get_size(), 2)
def test_system_grid_chunking(self): if vm.size != 4: raise SkipTest('vm.size != 4') from ocgis.spatial.grid_chunker import GridChunker path = self.path_esmf_unstruct rd_dst = RequestDataset(uri=path, driver=DriverESMFUnstruct, crs=Spherical(), grid_abstraction='point', grid_is_isomorphic=True) rd_src = deepcopy(rd_dst) resolution = 0.28125 chunk_wd = os.path.join(self.current_dir_output, 'chunks') if vm.rank == 0: os.mkdir(chunk_wd) vm.barrier() paths = {'wd': chunk_wd} gc = GridChunker(rd_src, rd_dst, nchunks_dst=[8], src_grid_resolution=resolution, dst_grid_resolution=resolution, optimized_bbox_subset=True, paths=paths, genweights=True) gc.write_chunks() dist = OcgDist() local_ctr = Dimension(name='ctr', size=8, dist=True) dist.add_dimension(local_ctr) dist.update_dimension_bounds() for ctr in range(local_ctr.bounds_local[0], local_ctr.bounds_local[1]): ctr += 1 s = os.path.join(chunk_wd, 'split_src_{}.nc'.format(ctr)) d = os.path.join(chunk_wd, 'split_dst_{}.nc'.format(ctr)) sf = Field.read(s, driver=DriverESMFUnstruct) df = Field.read(d, driver=DriverESMFUnstruct) self.assertGreater(sf.grid.shape[0], df.grid.shape[0]) wgt = os.path.join(chunk_wd, 'esmf_weights_{}.nc'.format(ctr)) f = Field.read(wgt) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0) with vm.scoped('merge weights', [0]): if not vm.is_null: merged_weights = self.get_temporary_file_path( 'merged_weights.nc') gc.create_merged_weight_file(merged_weights, strict=False) f = Field.read(merged_weights) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0)
def test_scoped(self): if MPI_SIZE != 8: raise SkipTest('MPI_SIZE != 8') vm = OcgVM() self.assertEqual(vm.size, 8) with vm.scoped('test', [2, 3, 4]): if not vm.is_null: self.assertEqual(vm.size, 3) self.assertEqual(vm.ranks, range(3)) with vm.scoped('nested', [1]): if not vm.is_null: self.assertEqual(vm.size, 1) self.assertEqual(len(vm._subcomms), 2) self.assertEqual(vm.size, 8) self.assertEqual(len(vm._subcomms), 0) vm.finalize() vm = OcgVM() self.assertEqual(vm.size, 8) vm.finalize()
def test_scoped(self): if MPI_SIZE != 8: raise SkipTest('MPI_SIZE != 8') vm = OcgVM() self.assertEqual(vm.size, 8) with vm.scoped('test', [2, 3, 4]): if not vm.is_null: self.assertEqual(vm.size, 3) self.assertEqual(vm.ranks, range(3)) with vm.scoped('nested', [1]): if not vm.is_null: self.assertEqual(vm.size, 1) self.assertEqual(len(vm._subcomms), 2) self.assertEqual(vm.size, 8) self.assertEqual(len(vm._subcomms), 0) vm.finalize() vm = OcgVM() self.assertEqual(vm.size, 8) vm.finalize()
def test_system_grid_chunking(self): if vm.size != 4: raise SkipTest('vm.size != 4') from ocgis.spatial.grid_chunker import GridChunker path = self.path_esmf_unstruct rd_dst = RequestDataset(uri=path, driver=DriverESMFUnstruct, crs=Spherical(), grid_abstraction='point', grid_is_isomorphic=True) rd_src = deepcopy(rd_dst) resolution = 0.28125 chunk_wd = os.path.join(self.current_dir_output, 'chunks') if vm.rank == 0: os.mkdir(chunk_wd) vm.barrier() paths = {'wd': chunk_wd} gc = GridChunker(rd_src, rd_dst, nchunks_dst=[8], src_grid_resolution=resolution, dst_grid_resolution=resolution, optimized_bbox_subset=True, paths=paths, genweights=True) gc.write_chunks() dist = OcgDist() local_ctr = Dimension(name='ctr', size=8, dist=True) dist.add_dimension(local_ctr) dist.update_dimension_bounds() for ctr in range(local_ctr.bounds_local[0], local_ctr.bounds_local[1]): ctr += 1 s = os.path.join(chunk_wd, 'split_src_{}.nc'.format(ctr)) d = os.path.join(chunk_wd, 'split_dst_{}.nc'.format(ctr)) sf = Field.read(s, driver=DriverESMFUnstruct) df = Field.read(d, driver=DriverESMFUnstruct) self.assertLessEqual(sf.grid.shape[0] - df.grid.shape[0], 150) self.assertGreater(sf.grid.shape[0], df.grid.shape[0]) wgt = os.path.join(chunk_wd, 'esmf_weights_{}.nc'.format(ctr)) f = Field.read(wgt) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0) with vm.scoped('merge weights', [0]): if not vm.is_null: merged_weights = self.get_temporary_file_path('merged_weights.nc') gc.create_merged_weight_file(merged_weights, strict=False) f = Field.read(merged_weights) S = f['S'].v() self.assertAlmostEqual(S.min(), 1.0) self.assertAlmostEqual(S.max(), 1.0)
def create_rank_valued_netcdf(self): rank_size = 10 size_global = vm.size_global with vm.scoped('write rank netcdf', [0]): if not vm.is_null: path = self.get_temporary_file_path('dist_desired.nc') dim = Dimension('dist_dim', rank_size * size_global) var = Variable(name='data', dimensions=dim, attrs={'hi': 5}) for rank in range(size_global): value = np.ones(rank_size) + (10 * (rank + 1)) bounds = (rank_size * rank, rank_size * rank + rank_size) var.get_value()[bounds[0]: bounds[1]] = value var.parent.attrs = {'hi_dataset_level': 'whee'} var.write(path) else: path = None path = vm.bcast(path) return path
def test_get_nonempty_ranks(self): from ocgis.variable.dimension import Dimension comm, rank, size = get_standard_comm_state() if size not in [1, 3]: raise SkipTest('MPI_SIZE != 1 or 3') target = Dimension('a') live_ranks = get_nonempty_ranks(target, vm) if MPI_RANK == 0: self.assertEqual(live_ranks, tuple(range(size))) if MPI_SIZE == 3: targets = {0: Dimension('a', is_empty=True, dist=True), 1: Dimension('a'), 2: Dimension('a')} with vm.scoped('ner', vm.ranks): live_ranks = get_nonempty_ranks(targets[MPI_RANK], vm) self.assertEqual(live_ranks, (1, 2))
def test_system_raise_exception_subcommunicator(self): if vm.size != 4: raise (SkipTest('vm.size != 4')) raiser = Mock(side_effect=IndexError('oops')) with self.assertRaises(IndexError): e = None with vm.scoped('the sub which will raise', [2]): if not vm.is_null: try: raiser() except IndexError as exc: e = exc es = vm.gather(e) es = vm.bcast(es) for e in es: if e is not None: raise e
def test_system_raise_exception_subcommunicator(self): if vm.size != 4: raise (SkipTest('vm.size != 4')) raiser = Mock(side_effect=IndexError('oops')) with self.assertRaises(IndexError): e = None with vm.scoped('the sub which will raise', [2]): if not vm.is_null: try: raiser() except IndexError as exc: e = exc es = vm.gather(e) es = vm.bcast(es) for e in es: if e is not None: raise e
def test_get_dist_default_distribution(self): """Test using default distributions defined by drivers.""" with vm.scoped('write', [0]): if not vm.is_null: path = self.get_temporary_file_path('foo.nc') varx = Variable('x', np.arange(5), dimensions='five', attrs={'axis': 'X'}) vary = Variable('y', np.arange(7) + 10, dimensions='seven', attrs={'axis': 'Y'}) vc = VariableCollection(variables=[varx, vary]) vc.write(path) else: path = None path = MPI_COMM.bcast(path) rd = RequestDataset(path) dist = rd.driver.dist distributed_dimension = dist.get_dimension('seven') self.assertTrue(distributed_dimension.dist)
def test_write_parallel(self): """Test writing by selective rank.""" if MPI_SIZE != 3 and MPI_SIZE != 1: raise SkipTest('MPI_SIZE != 1 or 3') ranks = list(range(MPI_SIZE)) for base_rank in ranks: for driver in [DriverCSV, DriverVector, DriverNetcdf]: if MPI_RANK == 0: path = self.get_temporary_file_path('{}-{}.{}'.format( driver.key, base_rank, driver.common_extension)) else: path = None path = MPI_COMM.bcast(path) with vm.scoped('field write by rank', [base_rank]): if not vm.is_null: geom = GeometryVariable( value=[Point(1, 2), Point(3, 4)], name='geom', dimensions='geom') data = Variable(name='data', value=[10, 20], dimensions='geom') field = Field(geom=geom) field.add_variable(data, is_data=True) self.assertFalse(os.path.isdir(path)) field.write(path, driver=driver) self.assertFalse(os.path.isdir(path)) rd = RequestDataset(path, driver=driver) in_field = rd.get() self.assertEqual(in_field['data'].dimensions[0].size, 2) MPI_COMM.Barrier() MPI_COMM.Barrier()
def test_get_distributed_slice(self): with vm.scoped('grid write', [0]): if MPI_RANK == 0: x = Variable('x', list(range(768)), 'x', float) y = Variable('y', list(range(768)), 'y', float) grid = Grid(x, y) field = Field(grid=grid) path = self.get_temporary_file_path('grid.nc') field.write(path) else: path = None path = vm.bcast(path) rd = RequestDataset(path) grid = rd.get().grid bounds_global = deepcopy([d.bounds_global for d in grid.dimensions]) for _ in range(10): _ = grid.get_distributed_slice([slice(73, 157), slice(305, 386)]) bounds_global_grid_after_slice = [ d.bounds_global for d in grid.dimensions ] self.assertEqual(bounds_global, bounds_global_grid_after_slice)
def test_get_intersects_one_rank_with_mask(self): """Test mask is created if one rank has a spatial mask.""" if MPI_SIZE != 2: raise SkipTest('MPI_SIZE != 2') if MPI_RANK == 0: value = [1, 2] else: value = [3, 4] ompi = OcgDist() xdim = ompi.create_dimension('x', 4, dist=True) ydim = ompi.create_dimension('y', 5, dist=False) ompi.update_dimension_bounds() x = Variable('x', value=value, dimensions=xdim) y = Variable('y', value=[1, 2, 3, 4, 5], dimensions=ydim) grid = Grid(x, y) wkt_geom = 'Polygon ((0.72993630573248502 5.22484076433120936, 0.70318471337579691 0.67707006369426814, 2.70063694267515952 0.69490445859872629, 2.59363057324840796 2.54076433121019107, 4.52866242038216527 2.51401273885350296, 4.40382165605095466 5.34968152866241908, 0.72993630573248502 5.22484076433120936))' subset_geom = wkt.loads(wkt_geom) sub = grid.get_intersects(subset_geom) path = self.get_temporary_file_path('foo.nc') field = Field(grid=sub) field.write(path) with vm.scoped('mask count', [0]): if not vm.is_null: rd = RequestDataset(path) out_field = rd.get() target = out_field[out_field.grid._mask_name].get_value() select = target != 0 self.assertEqual(select.sum(), 4)
def test_get_distributed_slice_on_rank_subset(self): """Test with some a priori empty dimensions.""" if MPI_SIZE != 4: raise SkipTest('MPI_SIZE != 4') ompi = OcgDist() dim = ompi.create_dimension('eight', 8, dist=True) ompi.update_dimension_bounds() sub = dim.get_distributed_slice(slice(2, 6)) live_ranks = get_nonempty_ranks(sub, vm) if MPI_RANK in [1, 2]: self.assertFalse(sub.is_empty) else: self.assertTrue(sub.is_empty) self.assertEqual(sub.bounds_local, (0, 0)) self.assertEqual(sub.bounds_global, (0, 0)) with vm.scoped('live rank dim subset', live_ranks): if not vm.is_null: sub2 = sub.get_distributed_slice(slice(2, 4)) else: sub2 = None if MPI_RANK == 2: self.assertEqual(sub2.bounds_local, (0, 2)) self.assertEqual(sub2.bounds_global, (0, 2)) self.assertFalse(sub2.is_empty) else: self.assertTrue(sub2 is None or sub2.is_empty) # Try again w/out scoping. if sub.is_empty: with self.assertRaises(EmptyObjectError): sub.get_distributed_slice(slice(2, 4))
def test_get_distributed_slice_on_rank_subset(self): """Test with some a priori empty dimensions.""" if MPI_SIZE != 4: raise SkipTest('MPI_SIZE != 4') ompi = OcgDist() dim = ompi.create_dimension('eight', 8, dist=True) ompi.update_dimension_bounds() sub = dim.get_distributed_slice(slice(2, 6)) live_ranks = get_nonempty_ranks(sub, vm) if MPI_RANK in [1, 2]: self.assertFalse(sub.is_empty) else: self.assertTrue(sub.is_empty) self.assertEqual(sub.bounds_local, (0, 0)) self.assertEqual(sub.bounds_global, (0, 0)) with vm.scoped('live rank dim subset', live_ranks): if not vm.is_null: sub2 = sub.get_distributed_slice(slice(2, 4)) else: sub2 = None if MPI_RANK == 2: self.assertEqual(sub2.bounds_local, (0, 2)) self.assertEqual(sub2.bounds_global, (0, 2)) self.assertFalse(sub2.is_empty) else: self.assertTrue(sub2 is None or sub2.is_empty) # Try again w/out scoping. if sub.is_empty: with self.assertRaises(EmptyObjectError): sub.get_distributed_slice(slice(2, 4))
def test_write_parallel(self): """Test writing by selective rank.""" if MPI_SIZE != 3 and MPI_SIZE != 1: raise SkipTest('MPI_SIZE != 1 or 3') ranks = list(range(MPI_SIZE)) for base_rank in ranks: for driver in [ DriverCSV, DriverVector, DriverNetcdf ]: if MPI_RANK == 0: path = self.get_temporary_file_path('{}-{}.{}'.format(driver.key, base_rank, driver.common_extension)) else: path = None path = MPI_COMM.bcast(path) with vm.scoped('field write by rank', [base_rank]): if not vm.is_null: geom = GeometryVariable(value=[Point(1, 2), Point(3, 4)], name='geom', dimensions='geom') data = Variable(name='data', value=[10, 20], dimensions='geom') field = Field(geom=geom) field.add_variable(data, is_data=True) self.assertFalse(os.path.isdir(path)) field.write(path, driver=driver) self.assertFalse(os.path.isdir(path)) rd = RequestDataset(path, driver=driver) in_field = rd.get() self.assertEqual(in_field['data'].dimensions[0].size, 2) MPI_COMM.Barrier() MPI_COMM.Barrier()
def get_distributed_slice(self, slc): """ Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be empty. :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy ``slc`` is not manipulated or redistributed prior to slicing. :rtype: :class:`~ocgis.Dimension` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) slc = get_formatted_slice(slc, 1)[0] is_fancy = not isinstance(slc, slice) if not is_fancy and slc == slice(None): ret = self.copy() # Use standard slicing for non-distributed dimensions. elif not self.dist: ret = self[slc] else: if is_fancy: local_slc = slc else: local_slc = get_global_to_local_slice((slc.start, slc.stop), self.bounds_local) if local_slc is not None: local_slc = slice(*local_slc) # Slice does not overlap local bounds. The dimension is now empty with size 0. if local_slc is None: ret = self.copy() ret.convert_to_empty() dimension_size = 0 # Slice overlaps so do a slice on the dimension using the local slice. else: ret = self[local_slc] dimension_size = len(ret) assert dimension_size >= 0 dimension_sizes = vm.gather(dimension_size) if vm.rank == 0: sum_dimension_size = 0 for ds in dimension_sizes: try: sum_dimension_size += ds except TypeError: pass bounds_global = (0, sum_dimension_size) else: bounds_global = None bounds_global = vm.bcast(bounds_global) if not ret.is_empty: ret.bounds_global = bounds_global # Normalize the local bounds on live ranks. inner_live_ranks = get_nonempty_ranks(ret, vm) with vm.scoped('bounds normalization', inner_live_ranks): if not vm.is_null: if vm.rank == 0: adjust = len(ret) else: adjust = None adjust = vm.bcast(adjust) for current_rank in vm.ranks: if vm.rank == current_rank: if vm.rank != 0: ret.bounds_local = [ b + adjust for b in ret.bounds_local ] adjust += len(ret) vm.barrier() adjust = vm.bcast(adjust, root=current_rank) return ret
def write_subsets(self, src_template, dst_template, wgt_template, index_path): """ Write grid subsets to netCDF files using the provided filename templates. The template must contain the full file path with a single curly-bracer pair to insert the combination counter. ``wgt_template`` should not be a full path. This name is used when generating weight files. >>> template_example = '/path/to/data_{}.nc' :param str src_template: The template for the source subset file. :param str dst_template: The template for the destination subset file. :param str wgt_template: The template for the weight filename. >>> wgt_template = 'esmf_weights_{}.nc' :param index_path: Path to the output indexing netCDF. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst))) for ctr, (sub_src, sub_dst, dst_slc) in enumerate(self.iter_src_grid_subsets(yield_dst=True), start=1): # padded = create_zero_padded_integer(ctr, nzeros) src_path = src_template.format(ctr) dst_path = dst_template.format(ctr) wgt_filename = wgt_template.format(ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_filename) dst_slices.append(dst_slc) for target, path in zip([sub_src, sub_dst], [src_path, dst_path]): if target.is_empty: is_empty = True target = None else: is_empty = False field = Field(grid=target, is_empty=is_empty) ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG) with vm.scoped_by_emptyable('field.write', field): if not vm.is_null: field.write(path) ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG) with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = ['source_filename', 'destination_filename', 'weights_filename'] values = [src_filenames, dst_filenames, wgt_filenames] grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{'esmf_role': 'grid_splitter_source'}, {'esmf_role': grid_splitter_destination}, {'esmf_role': 'grid_splitter_weights'}] vc = VariableCollection() grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_splitter_index) vidx.attrs['esmf_role'] = grid_splitter_index vidx.attrs['grid_splitter_source'] = 'source_filename' vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_splitter_weights'] = 'weights_filename' x_bounds = GridSplitterConstants.IndexFile.NAME_X_BOUNDS_VARIABLE vidx.attrs[x_bounds] = x_bounds y_bounds = GridSplitterConstants.IndexFile.NAME_Y_BOUNDS_VARIABLE vidx.attrs[y_bounds] = y_bounds vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) xb = Variable(name=x_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'x_split_bounds'}, dtype=int) yb = Variable(name=y_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'y_split_bounds'}, dtype=int) x_name = self.dst_grid.x.dimensions[0].name y_name = self.dst_grid.y.dimensions[0].name for idx, slc in enumerate(dst_slices): xb.get_value()[idx, :] = slc[x_name].start, slc[x_name].stop yb.get_value()[idx, :] = slc[y_name].start, slc[y_name].stop vc.add_variable(xb) vc.add_variable(yb) vc.write(index_path) vm.barrier()
def test_system_converting_state_boundaries_shapefile(self): verbose = False if verbose: ocgis.vm.barrier_print("starting test") ocgis.env.USE_NETCDF4_MPI = False # tdk:RELEASE:FIX: this hangs in the STATE_FIPS write for asynch might be nc4 bug... keywords = { 'transform_to_crs': [None, Spherical], 'use_geometry_iterator': [False, True] } actual_xsums = [] actual_ysums = [] for k in self.iter_product_keywords(keywords): if k.use_geometry_iterator and k.transform_to_crs is not None: to_crs = k.transform_to_crs() else: to_crs = None if k.transform_to_crs is None: desired_crs = WGS84() else: desired_crs = k.transform_to_crs() rd = RequestDataset(uri=self.path_state_boundaries, variable=['UGID', 'ID']) rd.metadata['schema']['geometry'] = 'MultiPolygon' field = rd.get() self.assertEqual(len(field.data_variables), 2) # Test there is no mask present. if verbose: ocgis.vm.barrier_print("before geom.load()") field.geom.load() if verbose: ocgis.vm.barrier_print("after geom.load()") self.assertFalse(field.geom.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, field) self.assertIsNone(field.dimension_map.get_spatial_mask()) self.assertEqual(field.crs, WGS84()) if k.transform_to_crs is not None: field.update_crs(desired_crs) self.assertEqual(len(field.data_variables), 2) self.assertEqual(len(field.geom.parent.data_variables), 2) if verbose: ocgis.vm.barrier_print("starting conversion") try: gc = field.geom.convert_to( pack=False, use_geometry_iterator=k.use_geometry_iterator, to_crs=to_crs) except ValueError as e: try: self.assertFalse(k.use_geometry_iterator) self.assertIsNotNone(to_crs) except AssertionError: raise e else: continue if verbose: ocgis.vm.barrier_print("after conversion") actual_xsums.append(gc.x.get_value().sum()) actual_ysums.append(gc.y.get_value().sum()) self.assertEqual(gc.crs, desired_crs) # Test there is no mask present after conversion to geometry coordinates. self.assertFalse(gc.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, gc.parent) self.assertIsNone(gc.dimension_map.get_spatial_mask()) path = self.get_temporary_file_path('esmf_state_boundaries.nc') self.assertEqual(gc.parent.crs, desired_crs) gc.parent.write(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) if verbose: ocgis.vm.barrier_print("after gc.parent.write") gathered_geoms = vm.gather(field.geom.get_value()) if verbose: ocgis.vm.barrier_print("after gathered_geoms") with vm.scoped("gather test", [0]): if not vm.is_null: actual_geoms = [] for g in gathered_geoms: actual_geoms.extend(g) rd = RequestDataset(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) infield = rd.get() self.assertEqual(create_crs(infield.crs.value), desired_crs) for dv in field.data_variables: self.assertIn(dv.name, infield) ingrid = infield.grid self.assertIsInstance(ingrid, GridUnstruct) for g in ingrid.archetype.iter_geometries(): self.assertPolygonSimilar(g[1], actual_geoms[g[0]], check_type=False) if verbose: ocgis.vm.barrier_print("after gathered_geoms testing") vm.barrier() # Test coordinates have actually changed. if verbose: ocgis.vm.barrier_print("before use_geometry_iterator test") if not k.use_geometry_iterator: for ctr, to_test in enumerate([actual_xsums, actual_ysums]): for lhs, rhs in itertools.combinations(to_test, 2): if ctr == 0: self.assertAlmostEqual(lhs, rhs) else: self.assertNotAlmostEqual(lhs, rhs)
def test_write_esmf_weights(self): # Create source and destination fields. This is the identity test, so the source and destination fields are # equivalent. src_grid = create_gridxy_global(resolution=3.0, crs=Spherical()) # Only test masking in serial to make indexing easier...just being lazy if vm.size == 1: mask = src_grid.get_mask(create=True) mask[4, 5] = True mask[25, 27] = True src_grid.set_mask(mask) self.assertEqual(src_grid.get_mask().sum(), 2) src_field = create_exact_field(src_grid, 'foo', ntime=3) dst_field = deepcopy(src_field) # Write the fields to disk for use in global file reconstruction and testing. if vm.rank == 0: master_path = self.get_temporary_file_path('foo.nc') src_field_path = self.get_temporary_file_path('src_field.nc') else: master_path = None src_field_path = None master_path = vm.bcast(master_path) src_field_path = vm.bcast(src_field_path) assert not os.path.exists(master_path) dst_field.write(master_path) src_field.write(src_field_path) # Remove the destination data variable to test its creation and filling dst_field.remove_variable('foo') # Chunk the fields and generate weights paths = {'wd': self.current_dir_output} gc = GridChunker(src_field, dst_field, nchunks_dst=(2, 2), genweights=True, paths=paths, esmf_kwargs={'regrid_method': 'BILINEAR'}) gc.write_chunks() # This is the path to the index file describing how to reconstruct the grid file index_path = os.path.join(self.current_dir_output, gc.paths['index_file']) # Execute the sparse matrix multiplication using weights read from file gc.smm(index_path, paths['wd']) with vm.scoped('index and reconstruct', [0]): if not vm.is_null: # Reconstruct the global destination file gc.insert_weighted(index_path, self.current_dir_output, master_path) # Load the actual values from file (destination) actual_field = RequestDataset(master_path).create_field() actual = actual_field.data_variables[0].mv() # Load the desired data from file (original values in the source field) desired = RequestDataset(src_field_path).create_field().data_variables[0].mv() if vm.size_global == 1: # Masking is only tested in serial self.assertEqual(actual_field.grid.get_mask().sum(), 2) else: self.assertIsNone(actual_field.grid.get_mask()) self.assertNumpyAll(actual, desired)
def write(self): ocgis_lh('starting write method', self._log, logging.DEBUG) # Indicates if user geometries should be written to file. write_ugeom = False # Path to the output object. f = {KeywordArgument.PATH: self.path} build = True for i, coll in enumerate(self): # This will be changed to "write" if we are on the build loop. write_mode = MPIWriteMode.APPEND if build: # During a build loop, create the file and write the first series of records. Let the drivers determine # the appropriate write modes for handling parallelism. write_mode = None # Write the user geometries if selected and there is one present on the incoming collection. if self._add_ugeom and coll.has_container_geometries: write_ugeom = True if write_ugeom: if vm.rank == 0: # The output file name for the user geometries. ugid_shp_name = self.prefix + '_ugid.shp' if self._add_ugeom_nest: ugeom_fiona_path = os.path.join( self._get_or_create_shp_folder_(), ugid_shp_name) else: ugeom_fiona_path = os.path.join( self.outdir, ugid_shp_name) else: ugeom_fiona_path = None build = False f[KeywordArgument.WRITE_MODE] = write_mode self._write_coll_(f, self._preformatting_(i, coll)) if write_ugeom: with vm.scoped(SubcommName.UGEOM_WRITE, [0]): if not vm.is_null: for subset_field in list(coll.children.values()): subset_field.write(ugeom_fiona_path, write_mode=write_mode, driver=DriverVector) # The metadata and dataset descriptor files may only be written if OCGIS operations are present. ops = self.ops if ops is not None and self.add_auxiliary_files and MPI_RANK == 0: # Add OCGIS metadata output if requested. if self.add_meta: ocgis_lh('adding OCGIS metadata file', 'conv', logging.DEBUG) from ocgis.conv.meta import MetaOCGISConverter lines = MetaOCGISConverter(ops).write() out_path = os.path.join( self.outdir, self.prefix + '_' + MetaOCGISConverter._meta_filename) with open(out_path, 'w') as f: f.write(lines) # Add the dataset descriptor file if requested. if self._add_did_file: ocgis_lh('writing dataset description (DID) file', 'conv', logging.DEBUG) path = os.path.join(self.outdir, self.prefix + '_did.csv') _write_dataset_identifier_file_(path, ops) # Add source metadata if requested. if self._add_source_meta: ocgis_lh('writing source metadata file', 'conv', logging.DEBUG) path = os.path.join(self.outdir, self.prefix + '_source_metadata.txt') _write_source_meta_(path, ops) # Return the internal path unless overloaded by subclasses. ret = self._get_return_() return ret
def test_system_spatial_averaging_through_operations_state_boundaries(self): if MPI_SIZE != 8: raise SkipTest('MPI_SIZE != 8') ntime = 3 # Get the exact field value for the state's representative center. with vm.scoped([0]): if MPI_RANK == 0: states = RequestDataset(self.path_state_boundaries, driver='vector').get() states.update_crs(env.DEFAULT_COORDSYS) fill = np.zeros((states.geom.shape[0], 2)) for idx, geom in enumerate(states.geom.get_value().flat): centroid = geom.centroid fill[idx, :] = centroid.x, centroid.y exact_states = create_exact_field_value(fill[:, 0], fill[:, 1]) state_ugid = states['UGID'].get_value() area = states.geom.area keywords = { 'spatial_operation': [ 'clip', 'intersects' ], 'aggregate': [ True, False ], 'wrapped': [True, False], 'output_format': [ OutputFormatName.OCGIS, 'csv', 'csv-shp', 'shp' ], } # total_iterations = len(list(self.iter_product_keywords(keywords))) for ctr, k in enumerate(self.iter_product_keywords(keywords)): # barrier_print(k) # if ctr % 1 == 0: # if vm.is_root: # print('Iteration {} of {}...'.format(ctr + 1, total_iterations)) with vm.scoped([0]): if vm.is_root: grid = create_gridxy_global(resolution=1.0, dist=False, wrapped=k.wrapped) field = create_exact_field(grid, 'foo', ntime=ntime) path = self.get_temporary_file_path('foo.nc') field.write(path) else: path = None path = MPI_COMM.bcast(path) rd = RequestDataset(path) ops = OcgOperations(dataset=rd, geom='state_boundaries', spatial_operation=k.spatial_operation, aggregate=k.aggregate, output_format=k.output_format, prefix=str(ctr), # geom_select_uid=[8] ) ret = ops.execute() # Test area is preserved for a problem element during union. The union's geometry was not fully represented # in the output. if k.output_format == 'shp' and k.aggregate and k.spatial_operation == 'clip': with vm.scoped([0]): if vm.is_root: inn = RequestDataset(ret).get() inn_ugid_idx = np.where(inn['UGID'].get_value() == 8)[0][0] ugid_idx = np.where(state_ugid == 8)[0][0] self.assertAlmostEqual(inn.geom.get_value()[inn_ugid_idx].area, area[ugid_idx], places=2) # Test the overview geometry shapefile is written. if k.output_format == 'shp': directory = os.path.split(ret)[0] contents = os.listdir(directory) actual = ['_ugid.shp' in c for c in contents] self.assertTrue(any(actual)) elif k.output_format == 'csv-shp': directory = os.path.split(ret)[0] directory = os.path.join(directory, 'shp') contents = os.listdir(directory) actual = ['_ugid.shp' in c for c in contents] self.assertTrue(any(actual)) if not k.aggregate: actual = ['_gid.shp' in c for c in contents] self.assertTrue(any(actual)) if k.output_format == OutputFormatName.OCGIS: geom_keys = ret.children.keys() all_geom_keys = vm.gather(np.array(geom_keys)) if vm.is_root: all_geom_keys = hgather(all_geom_keys) self.assertEqual(len(np.unique(all_geom_keys)), 51) if k.aggregate: actual = Dict() for field, container in ret.iter_fields(yield_container=True): if not field.is_empty: ugid = container.geom.ugid.get_value()[0] actual[ugid]['actual'] = field.data_variables[0].get_value() actual[ugid]['area'] = container.geom.area[0] actual = vm.gather(actual) if vm.is_root: actual = dgather(actual) ares = [] actual_areas = [] for ugid_key, v in actual.items(): ugid_idx = np.where(state_ugid == ugid_key)[0][0] desired = exact_states[ugid_idx] actual_areas.append(v['area']) for tidx in range(ntime): are = np.abs((desired + ((tidx + 1) * 10)) - v['actual'][tidx, 0]) ares.append(are) if k.spatial_operation == 'clip': diff = np.abs(np.array(area) - np.array(actual_areas)) self.assertLess(np.max(diff), 1e-6) self.assertLess(np.mean(diff), 1e-6) # Test relative errors. self.assertLess(np.max(ares), 0.031) self.assertLess(np.mean(ares), 0.009)
def write_subsets(self): """ Write grid subsets to netCDF files using the provided filename templates. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] src_slices = [] index_path = self.create_full_path_from_template('index_file') # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst))) ctr = 1 for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets(yield_dst=True): # if vm.rank == 0: # vm.rank_print('write_subset iterator count :: {}'.format(ctr)) # tstart = time.time() # padded = create_zero_padded_integer(ctr, nzeros) src_path = self.create_full_path_from_template('src_template', index=ctr) dst_path = self.create_full_path_from_template('dst_template', index=ctr) wgt_path = self.create_full_path_from_template('wgt_template', index=ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_path) dst_slices.append(dst_slc) src_slices.append(src_slc) # Only write destinations if an iterator is not provided. if self.iter_dst is None: zip_args = [[sub_src, sub_dst], [src_path, dst_path]] else: zip_args = [[sub_src], [src_path]] for target, path in zip(*zip_args): with vm.scoped_by_emptyable('field.write', target): if not vm.is_null: ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG) field = Field(grid=target) field.write(path) ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG) # Increment the counter outside of the loop to avoid counting empty subsets. ctr += 1 # if vm.rank == 0: # tstop = time.time() # vm.rank_print('timing::write_subset iteration::{}'.format(tstop - tstart)) # Global shapes require a VM global scope to collect. src_global_shape = global_grid_shape(self.src_grid) dst_global_shape = global_grid_shape(self.dst_grid) # Gather and collapse source slices as some may be empty and we write on rank 0. gathered_src_grid_slice = vm.gather(src_slices) if vm.rank == 0: len_src_slices = len(src_slices) new_src_grid_slice = [None] * len_src_slices for idx in range(len_src_slices): for rank_src_grid_slice in gathered_src_grid_slice: if rank_src_grid_slice[idx] is not None: new_src_grid_slice[idx] = rank_src_grid_slice[idx] break src_slices = new_src_grid_slice with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = ['source_filename', 'destination_filename', 'weights_filename'] values = [src_filenames, dst_filenames, wgt_filenames] grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{'esmf_role': 'grid_splitter_source'}, {'esmf_role': grid_splitter_destination}, {'esmf_role': 'grid_splitter_weights'}] vc = VariableCollection() grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_splitter_index) vidx.attrs['esmf_role'] = grid_splitter_index vidx.attrs['grid_splitter_source'] = 'source_filename' vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_splitter_weights'] = 'weights_filename' vidx.attrs[GridSplitterConstants.IndexFile.NAME_SRC_GRID_SHAPE] = src_global_shape vidx.attrs[GridSplitterConstants.IndexFile.NAME_DST_GRID_SHAPE] = dst_global_shape vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) # TODO: This needs to work with four dimensions. # Source ----------------------------------------------------------------------------------------------- self.src_grid._gs_create_index_bounds_(RegriddingRole.SOURCE, vidx, vc, src_slices, dim, bounds_dimension) # Destination ------------------------------------------------------------------------------------------ self.dst_grid._gs_create_index_bounds_(RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim, bounds_dimension) vc.write(index_path) vm.barrier()
def write(self): ocgis_lh('starting write method', self._log, logging.DEBUG) # Indicates if user geometries should be written to file. write_ugeom = False # Path to the output object. f = {KeywordArgument.PATH: self.path} build = True for i, coll in enumerate(self): # This will be changed to "write" if we are on the build loop. write_mode = MPIWriteMode.APPEND if build: # During a build loop, create the file and write the first series of records. Let the drivers determine # the appropriate write modes for handling parallelism. write_mode = None # Write the user geometries if selected and there is one present on the incoming collection. if self._add_ugeom and coll.has_container_geometries: write_ugeom = True if write_ugeom: if vm.rank == 0: # The output file name for the user geometries. ugid_shp_name = self.prefix + '_ugid.shp' if self._add_ugeom_nest: ugeom_fiona_path = os.path.join(self._get_or_create_shp_folder_(), ugid_shp_name) else: ugeom_fiona_path = os.path.join(self.outdir, ugid_shp_name) else: ugeom_fiona_path = None build = False f[KeywordArgument.WRITE_MODE] = write_mode self._write_coll_(f, self._preformatting_(i, coll)) if write_ugeom: with vm.scoped(SubcommName.UGEOM_WRITE, [0]): if not vm.is_null: for subset_field in list(coll.children.values()): subset_field.write(ugeom_fiona_path, write_mode=write_mode, driver=DriverVector) # The metadata and dataset descriptor files may only be written if OCGIS operations are present. ops = self.ops if ops is not None and self.add_auxiliary_files and MPI_RANK == 0: # Add OCGIS metadata output if requested. if self.add_meta: ocgis_lh('adding OCGIS metadata file', 'conv', logging.DEBUG) from ocgis.conv.meta import MetaOCGISConverter lines = MetaOCGISConverter(ops).write() out_path = os.path.join(self.outdir, self.prefix + '_' + MetaOCGISConverter._meta_filename) with open(out_path, 'w') as f: f.write(lines) # Add the dataset descriptor file if requested. if self._add_did_file: ocgis_lh('writing dataset description (DID) file', 'conv', logging.DEBUG) path = os.path.join(self.outdir, self.prefix + '_did.csv') _write_dataset_identifier_file_(path, ops) # Add source metadata if requested. if self._add_source_meta: ocgis_lh('writing source metadata file', 'conv', logging.DEBUG) path = os.path.join(self.outdir, self.prefix + '_source_metadata.txt') _write_source_meta_(path, ops) # Return the internal path unless overloaded by subclasses. ret = self._get_return_() return ret
def write(self): ocgis_lh('starting write method', self._log, logging.DEBUG) # Indicates if user geometries should be written to file. write_ugeom = False ncoll = len(self.ops.geom) build = True for i, coll in enumerate(self): ugids = coll.properties.keys() assert len(ugids) == 1 ugid = ugids[0] # Geometry centroid location lon, lat = coll.geoms[ugid].centroid.xy for field in coll.iter_fields(): lon_attrs = field.x.attrs.copy() lat_attrs = field.y.attrs.copy() # Removed for now. It'd be nice to find an elegant way to retain those. field.remove_variable('lat') field.remove_variable('lon') # Create new lon and lat variables field.add_variable( ocgis.Variable('lon', value=lon, dimensions=(DimensionName.UNIONED_GEOMETRY,), attrs=dict(lon_attrs, **{'long_name':'Centroid longitude'}) ) ) field.add_variable( ocgis.Variable('lat', value=lat, dimensions=(DimensionName.UNIONED_GEOMETRY,), attrs=dict(lat_attrs, **{'long_name':'Centroid latitude'}) ) ) if 'ocgis_spatial_mask' in field: # Remove the spatial_mask and replace by new one. field.remove_variable('ocgis_spatial_mask') grid = ocgis.Grid(field['lon'], field['lat'], abstraction='point', crs=field.crs, parent=field) grid.set_mask([[False,]]) field.set_grid(grid) # Geometry variables from the geom properties dict # There is no metadata for those... dm = get_data_model(self.ops) for key, val in coll.properties[ugid].items(): if np.issubdtype(type(val), int): dt = get_dtype('int', dm) elif np.issubdtype(type(val), float): dt = get_dtype('float', dm) else: dt='auto' field.add_variable( ocgis.Variable(key, value=[val,], dtype=dt, dimensions=(DimensionName.UNIONED_GEOMETRY,))) # ------------------ Dimension update ------------------------ # # Modify the dimensions for the number of geometries gdim = field.dimensions[DimensionName.UNIONED_GEOMETRY] gdim.set_size(ncoll) for var in field.iter_variables_by_dimensions([gdim]): d = var.dimensions_dict[DimensionName.UNIONED_GEOMETRY] d.bounds_local = (i, i+1) # ------------------------------------------------------------ # # CF-Conventions # Can this be anything else than a timeseries_id # Options are timeseries_id, profile_id, trajectory_id gid = field[HeaderName.ID_GEOMETRY] gid.attrs['cf_role'] = 'timeseries_id' # TODO: Hard-code the name in constants.py gdim.set_name('region') # Path to the output object. # I needed to put it here because _write_archetype pops it, so it's not available after the first loop. f = {KeywordArgument.PATH: self.path} # This will be changed to "write" if we are on the build loop. write_mode = MPIWriteMode.APPEND if build: # During a build loop, create the file and write the first series of records. Let the drivers determine # the appropriate write modes for handling parallelism. write_mode = None # Write the user geometries if selected and there is one present on the incoming collection. if self._add_ugeom and coll.has_container_geometries: write_ugeom = True if write_ugeom: if vm.rank == 0: # The output file name for the user geometries. ugid_shp_name = self.prefix + '_ugid.shp' if self._add_ugeom_nest: ugeom_fiona_path = os.path.join(self._get_or_create_shp_folder_(), ugid_shp_name) else: ugeom_fiona_path = os.path.join(self.outdir, ugid_shp_name) else: ugeom_fiona_path = None build = False f[KeywordArgument.WRITE_MODE] = write_mode self._write_coll_(f, coll) if write_ugeom: with vm.scoped(SubcommName.UGEOM_WRITE, [0]): if not vm.is_null: for subset_field in list(coll.children.values()): subset_field.write(ugeom_fiona_path, write_mode=write_mode, driver=DriverVector) # The metadata and dataset descriptor files may only be written if OCGIS operations are present. ops = self.ops if ops is not None and self.add_auxiliary_files and MPI_RANK == 0: # Add OCGIS metadata output if requested. if self.add_meta: ocgis_lh('adding OCGIS metadata file', 'conv', logging.DEBUG) from ocgis.conv.meta import MetaOCGISConverter lines = MetaOCGISConverter(ops).write() out_path = os.path.join(self.outdir, self.prefix + '_' + MetaOCGISConverter._meta_filename) with open(out_path, 'w') as f: f.write(lines) # Add the dataset descriptor file if requested. if self._add_did_file: ocgis_lh('writing dataset description (DID) file', 'conv', logging.DEBUG) path = os.path.join(self.outdir, self.prefix + '_did.csv') _write_dataset_identifier_file_(path, ops) # Add source metadata if requested. if self._add_source_meta: ocgis_lh('writing source metadata file', 'conv', logging.DEBUG) path = os.path.join(self.outdir, self.prefix + '_source_metadata.txt') _write_source_meta_(path, ops) # Return the internal path unless overloaded by subclasses. ret = self._get_return_() return ret
def test_get_intersects_ordering(self): """Test grid ordering/origins do not influence grid subsetting.""" keywords = { KeywordArgument.OPTIMIZED_BBOX_SUBSET: [False, True], 'should_wrap': [False, True], 'reverse_x': [False, True], 'reverse_y': [False, True], 'should_expand': [False, True], } x_value = np.array( [155., 160., 165., 170., 175., 180., 185., 190., 195., 200., 205.]) y_value = np.array([-20., -15., -10., -5., 0., 5., 10., 15., 20.]) bbox = [168., -12., 191., 5.3] for k in self.iter_product_keywords(keywords, as_namedtuple=False): reverse_x = k.pop('reverse_x') reverse_y = k.pop('reverse_y') should_expand = k.pop('should_expand') should_wrap = k.pop('should_wrap') ompi = OcgDist() ompi.create_dimension('dx', len(x_value), dist=True) ompi.create_dimension('dy', len(y_value)) ompi.update_dimension_bounds() if reverse_x: new_x_value = x_value.copy() new_x_value = np.flipud(new_x_value) else: new_x_value = x_value if reverse_y: new_y_value = y_value.copy() new_y_value = np.flipud(new_y_value) else: new_y_value = y_value if MPI_RANK == 0: x = Variable('x', new_x_value, 'dx') y = Variable('y', new_y_value, 'dy') else: x, y = [None, None] x = variable_scatter(x, ompi) y = variable_scatter(y, ompi) grid = Grid(x, y, crs=Spherical()) with vm.scoped_by_emptyable('scattered', grid): if not vm.is_null: if should_expand: expand_grid(grid) if should_wrap: grid = deepcopy(grid) grid.wrap() actual_bbox = MultiPolygon([ box(-180, -12, -169, 5.3), box(168, -12, 180, 5.3) ]) else: actual_bbox = box(*bbox) live_ranks = vm.get_live_ranks_from_object(grid) with vm.scoped('grid.get_intersects', live_ranks): if not vm.is_null: sub = grid.get_intersects(actual_bbox, **k) with vm.scoped_by_emptyable('sub grid', sub): if not vm.is_null: if should_wrap: current_x_value = sub.x.get_value() current_x_value[ sub.x.get_value() < 0] += 360 self.assertEqual( sub.extent_global, (170.0, -10.0, 190.0, 5.0)) if should_expand: desired = False else: desired = True self.assertEqual(grid.is_vectorized, desired) self.assertEqual(sub.is_vectorized, desired) self.assertFalse(grid.has_allocated_point) self.assertFalse( grid.has_allocated_polygon)
def get_distributed_slice(self, slc): """ Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be empty. :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy ``slc`` is not manipulated or redistributed prior to slicing. :rtype: :class:`~ocgis.Dimension` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) slc = get_formatted_slice(slc, 1)[0] is_fancy = not isinstance(slc, slice) if not is_fancy and slc == slice(None): ret = self.copy() # Use standard slicing for non-distributed dimensions. elif not self.dist: ret = self[slc] else: if is_fancy: local_slc = slc else: local_slc = get_global_to_local_slice((slc.start, slc.stop), self.bounds_local) if local_slc is not None: local_slc = slice(*local_slc) # Slice does not overlap local bounds. The dimension is now empty with size 0. if local_slc is None: ret = self.copy() ret.convert_to_empty() dimension_size = 0 # Slice overlaps so do a slice on the dimension using the local slice. else: ret = self[local_slc] dimension_size = len(ret) assert dimension_size >= 0 dimension_sizes = vm.gather(dimension_size) if vm.rank == 0: sum_dimension_size = 0 for ds in dimension_sizes: try: sum_dimension_size += ds except TypeError: pass bounds_global = (0, sum_dimension_size) else: bounds_global = None bounds_global = vm.bcast(bounds_global) if not ret.is_empty: ret.bounds_global = bounds_global # Normalize the local bounds on live ranks. inner_live_ranks = get_nonempty_ranks(ret, vm) with vm.scoped('bounds normalization', inner_live_ranks): if not vm.is_null: if vm.rank == 0: adjust = len(ret) else: adjust = None adjust = vm.bcast(adjust) for current_rank in vm.ranks: if vm.rank == current_rank: if vm.rank != 0: ret.bounds_local = [b + adjust for b in ret.bounds_local] adjust += len(ret) vm.barrier() adjust = vm.bcast(adjust, root=current_rank) return ret
def test_get_intersects_parallel(self): if sys.version_info.major == 3 and sys.version_info.minor == 5: raise SkipTest('undefined behavior with Python 3.5') grid = self.get_gridxy() live_ranks = vm.get_live_ranks_from_object(grid) # Test with an empty subset. subset_geom = box(1000., 1000., 1100., 1100.) with vm.scoped('empty subset', live_ranks): if not vm.is_null: with self.assertRaises(EmptySubsetError): grid.get_intersects(subset_geom) # Test combinations. subset_geom = box(101.5, 40.5, 102.5, 42.) keywords = dict(is_vectorized=[True, False], has_bounds=[False, True], use_bounds=[False, True], keep_touches=[True, False]) for ctr, k in enumerate(self.iter_product_keywords(keywords)): grid = self.get_gridxy() vm_name, _ = vm.create_subcomm_by_emptyable('grid testing', grid, is_current=True) if vm.is_null: vm.free_subcomm(name=vm_name) vm.set_comm() continue if k.has_bounds: grid.set_extrapolated_bounds('xbounds', 'ybounds', 'bounds') self.assertTrue(grid.has_bounds) # Cannot use bounds with a point grid abstraction. if k.use_bounds and grid.abstraction == 'point': vm.free_subcomm(name=vm_name) vm.set_comm() continue grid_sub, slc = grid.get_intersects(subset_geom, keep_touches=k.keep_touches, use_bounds=k.use_bounds, return_slice=True) if k.has_bounds: self.assertTrue(grid.has_bounds) # Test geometries are filled appropriately after allocation. if not grid_sub.is_empty: for t in grid_sub.get_abstraction_geometry().get_value().flat: self.assertIsInstance(t, BaseGeometry) self.assertIsInstance(grid_sub, Grid) if k.keep_touches: if k.has_bounds and k.use_bounds: desired = (slice(0, 3, None), slice(0, 3, None)) else: desired = (slice(1, 3, None), slice(1, 2, None)) else: if k.has_bounds and k.use_bounds: desired = (slice(1, 3, None), slice(1, 2, None)) else: desired = (slice(1, 2, None), slice(1, 2, None)) if not grid.is_empty: self.assertEqual(grid.has_bounds, k.has_bounds) self.assertTrue(grid.is_vectorized) self.assertEqual(slc, desired) vm.free_subcomm(name=vm_name) vm.set_comm() # Test against a file. ######################################################################################### subset_geom = box(101.5, 40.5, 102.5, 42.) if MPI_RANK == 0: path_grid = self.get_temporary_file_path('grid.nc') else: path_grid = None path_grid = MPI_COMM.bcast(path_grid) grid_to_write = self.get_gridxy() with vm.scoped_by_emptyable('write', grid_to_write): if not vm.is_null: field = Field(grid=grid_to_write) field.write(path_grid, driver=DriverNetcdfCF) MPI_COMM.Barrier() rd = RequestDataset(uri=path_grid) x = SourcedVariable(name='x', request_dataset=rd) self.assertIsNone(x._value) y = SourcedVariable(name='y', request_dataset=rd) self.assertIsNone(x._value) self.assertIsNone(y._value) grid = Grid(x, y) for target in [grid._y_name, grid._x_name]: self.assertIsNone(grid.parent[target]._value) self.assertTrue(grid.is_vectorized) with vm.scoped_by_emptyable('intersects', grid): if not vm.is_null: sub, slc = grid.get_intersects(subset_geom, return_slice=True) self.assertEqual(slc, (slice(1, 3, None), slice(1, 2, None))) self.assertIsInstance(sub, Grid) # The file may be deleted before other ranks open. MPI_COMM.Barrier()
def test_write_esmf_weights(self): # Create source and destination fields. This is the identity test, so the source and destination fields are # equivalent. src_grid = create_gridxy_global(resolution=3.0, crs=Spherical()) # Only test masking in serial to make indexing easier...just being lazy if vm.size == 1: mask = src_grid.get_mask(create=True) mask[4, 5] = True mask[25, 27] = True src_grid.set_mask(mask) self.assertEqual(src_grid.get_mask().sum(), 2) src_field = create_exact_field(src_grid, 'foo', ntime=3) dst_field = deepcopy(src_field) # Write the fields to disk for use in global file reconstruction and testing. if vm.rank == 0: master_path = self.get_temporary_file_path('foo.nc') src_field_path = self.get_temporary_file_path('src_field.nc') else: master_path = None src_field_path = None master_path = vm.bcast(master_path) src_field_path = vm.bcast(src_field_path) assert not os.path.exists(master_path) dst_field.write(master_path) src_field.write(src_field_path) # Remove the destination data variable to test its creation and filling dst_field.remove_variable('foo') # Chunk the fields and generate weights paths = {'wd': self.current_dir_output} gc = GridChunker(src_field, dst_field, nchunks_dst=(2, 2), genweights=True, paths=paths, esmf_kwargs={'regrid_method': 'BILINEAR'}) gc.write_chunks() # This is the path to the index file describing how to reconstruct the grid file index_path = os.path.join(self.current_dir_output, gc.paths['index_file']) # Execute the sparse matrix multiplication using weights read from file gc.smm(index_path, paths['wd']) with vm.scoped('index and reconstruct', [0]): if not vm.is_null: # Reconstruct the global destination file gc.insert_weighted(index_path, self.current_dir_output, master_path) # Load the actual values from file (destination) actual_field = RequestDataset(master_path).create_field() actual = actual_field.data_variables[0].mv() # Load the desired data from file (original values in the source field) desired = RequestDataset( src_field_path).create_field().data_variables[0].mv() if vm.size_global == 1: # Masking is only tested in serial self.assertEqual(actual_field.grid.get_mask().sum(), 2) else: self.assertIsNone(actual_field.grid.get_mask()) self.assertNumpyAll(actual, desired)