def get_periodicity_parameters(grid): """ Get characteristics of a grid's periodicity. This is only applicable for grids with a spherical coordinate system. There are two classifications: 1. A grid is periodic (i.e. it has global coverage). Periodicity is determined only with the x/longitude dimension. 2. A grid is non-periodic (i.e. it has regional coverage). Call is collective across the current VM. :param grid: :class:`~ocgis.Grid` :return: A dictionary containing periodicity parameters. :rtype: dict """ # Check if grid may be flagged as "periodic" by determining if its extent is global. Use the centroids and the grid # resolution to determine this. is_periodic = False col = grid.x.get_value() resolution = grid.resolution_x min_col, max_col = col.min(), col.max() # Work only with unwrapped coordinates. if min_col < 0: select = col < 0 if select.any(): max_col = np.max(col[col < 0]) + 360. select = col >= 0 if select.any(): min_col = np.min(col[col >= 0]) # Check the min and max column values are within a tolerance (the grid resolution) of global (0 to 360) edges. if (0. - resolution) <= min_col <= (0. + resolution): min_periodic = True else: min_periodic = False if (360. - resolution) <= max_col <= (360. + resolution): max_periodic = True else: max_periodic = False # Determin global periodicity. min_periodic = vm.gather(min_periodic) max_periodic = vm.gather(max_periodic) if vm.rank == 0: min_periodic = any(min_periodic) max_periodic = any(max_periodic) if min_periodic and max_periodic: is_periodic = True else: is_periodic = False is_periodic = vm.bcast(is_periodic) # If the grid is periodic, set the appropriate parameters. if is_periodic: num_peri_dims = 1 periodic_dim = 0 pole_dim = 1 else: num_peri_dims, pole_dim, periodic_dim = [None] * 3 ret = {'num_peri_dims': num_peri_dims, 'pole_dim': pole_dim, 'periodic_dim': periodic_dim} return ret
def test_reduce_reindex_coordinate_variables(self): self.add_barrier = False dist = OcgDist() dist.create_dimension('dim', 12, dist=True) dist.update_dimension_bounds() global_cindex_arr = np.array([4, 2, 1, 2, 1, 4, 1, 4, 2, 5, 6, 7]) if vm.rank == 0: var_cindex = Variable('cindex', value=global_cindex_arr, dimensions='dim') else: var_cindex = None var_cindex = variable_scatter(var_cindex, dist) vm.create_subcomm_by_emptyable('test', var_cindex, is_current=True) if vm.is_null: return raise_if_empty(var_cindex) coords = np.array([ 0, 11, 22, 33, 44, 55, 66, 77, 88, 99, 100, 110, 120, 130, 140, 150 ]) coords = Variable(name='coords', value=coords, dimensions='coord_dim') new_cindex, u_indices = reduce_reindex_coordinate_variables(var_cindex) desired = coords[global_cindex_arr].get_value() if len(u_indices) > 0: new_coords = coords[u_indices].get_value() else: new_coords = np.array([]) gathered_new_coords = vm.gather(new_coords) gathered_new_cindex = vm.gather(new_cindex) if vm.rank == 0: gathered_new_coords = hgather(gathered_new_coords) gathered_new_cindex = hgather(gathered_new_cindex) actual = gathered_new_coords[gathered_new_cindex] self.assertAsSetEqual(gathered_new_cindex.tolist(), [2, 1, 0, 3, 4, 5]) desired_new_coords = [11, 22, 44, 55, 66, 77] self.assertAsSetEqual(gathered_new_coords.tolist(), desired_new_coords) self.assertEqual(len(gathered_new_coords), len(desired_new_coords)) self.assertNumpyAll(actual, desired)
def test_create_unique_global_array(self): dist = OcgDist() dist.create_dimension('dim', 9, dist=True) dist.update_dimension_bounds() values = [ [4, 2, 1, 2, 1, 4, 1, 4, 2], [44, 25, 16, 27, 18, 49, 10, 41, 22], [44, 25, 16, 27, 44, 49, 10, 41, 44], [1, 1, 1, 1, 1, 1, 1, 1, 1] ] for v in values: if vm.rank == 0: index = Variable(name='cindex', value=v, dimensions='dim') desired = np.unique(index.get_value()) desired_length = len(desired) else: index = None index = variable_scatter(index, dist) with vm.scoped_by_emptyable('not empty', index): if not vm.is_null: uvar = create_unique_global_array(index.get_value()) uvar_gathered = vm.gather(uvar) if vm.rank == 0: uvar_gathered = hgather(uvar_gathered) self.assertEqual(len(uvar_gathered), desired_length) self.assertEqual(set(uvar_gathered), set(desired))
def _gc_iter_dst_grid_slices_(grid_chunker): # TODO: This method uses some global gathers which is not ideal. # Destination splitting works off center coordinates only. pgc = grid_chunker.dst_grid.abstractions_available['point'] # Use the unique center values to break the grid into pieces. This ensures that nearby grid cell are close # spatially. If we just break the grid into pieces w/out using unique values, the points may be scattered which # does not optimize the spatial coverage of the source grid. center_lat = pgc.y.get_value() # ucenter_lat = np.unique(center_lat) ucenter_lat = create_unique_global_array(center_lat) ucenter_lat = vm.gather(ucenter_lat) if vm.rank == 0: ucenter_lat = hgather(ucenter_lat) ucenter_lat.sort() ucenter_splits = np.array_split(ucenter_lat, grid_chunker.nchunks_dst[0]) else: ucenter_splits = [None] * grid_chunker.nchunks_dst[0] for ucenter_split in ucenter_splits: ucenter_split = vm.bcast(ucenter_split) select = np.zeros_like(center_lat, dtype=bool) for v in ucenter_split.flat: select = np.logical_or(select, center_lat == v) yield select
def test_create_unique_global_array(self): dist = OcgDist() dist.create_dimension('dim', 9, dist=True) dist.update_dimension_bounds() values = [[4, 2, 1, 2, 1, 4, 1, 4, 2], [44, 25, 16, 27, 18, 49, 10, 41, 22], [44, 25, 16, 27, 44, 49, 10, 41, 44], [1, 1, 1, 1, 1, 1, 1, 1, 1]] for v in values: if vm.rank == 0: index = Variable(name='cindex', value=v, dimensions='dim') desired = np.unique(index.get_value()) desired_length = len(desired) else: index = None index = variable_scatter(index, dist) with vm.scoped_by_emptyable('not empty', index): if not vm.is_null: uvar = create_unique_global_array(index.get_value()) uvar_gathered = vm.gather(uvar) if vm.rank == 0: uvar_gathered = hgather(uvar_gathered) self.assertEqual(len(uvar_gathered), desired_length) self.assertEqual(set(uvar_gathered), set(desired))
def _get_field_write_target_(cls, field): """Collective!""" if field.crs is not None: field.crs.format_spatial_object(field) grid = field.grid if grid is not None: # If any grid pieces are masked, ensure the mask is created across all grids. has_mask = vm.gather(grid.has_mask) if vm.rank == 0: if any(has_mask): create_mask = True else: create_mask = False else: create_mask = None create_mask = vm.bcast(create_mask) if create_mask and not grid.has_mask: grid.get_mask(create=True) # Putting units on bounds for netCDF-CF can confuse some parsers. if grid.has_bounds: field = field.copy() field.x.bounds.attrs.pop('units', None) field.y.bounds.attrs.pop('units', None) # Remove the current coordinate system if this is a dummy coordinate system. if env.COORDSYS_ACTUAL is not None: field = field.copy() field.set_crs(env.COORDSYS_ACTUAL, should_add=True) return field
def _get_field_write_target_(cls, field): """Collective!""" ocgis_lh(level=10, logger="driver.nc", msg="entering _get_field_write_target_") if field.crs is not None: field.crs.format_spatial_object(field) grid = field.grid if grid is not None: # If any grid pieces are masked, ensure the mask is created across all grids. has_mask = vm.gather(grid.has_mask) if vm.rank == 0: if any(has_mask): create_mask = True else: create_mask = False else: create_mask = None create_mask = vm.bcast(create_mask) if create_mask and not grid.has_mask: grid.get_mask(create=True) # Putting units on bounds for netCDF-CF can confuse some parsers. if grid.has_bounds: field = field.copy() field.x.bounds.attrs.pop('units', None) field.y.bounds.attrs.pop('units', None) # Remove the current coordinate system if this is a dummy coordinate system. if env.COORDSYS_ACTUAL is not None: field = field.copy() field.set_crs(env.COORDSYS_ACTUAL, should_add=True) return field
def _get_field_write_target_(cls, field): """Collective!""" # These changes to the field can be maintained following a write. if field.crs is not None: field.crs.format_field(field) # Putting units on bounds for netCDF-CF can confuse some parsers. grid = field.grid if grid is not None: # If any grid pieces are masked, ensure the mask is created across all grids. has_mask = vm.gather(grid.has_mask) if vm.rank == 0: if any(has_mask): create_mask = True else: create_mask = False else: create_mask = None create_mask = vm.bcast(create_mask) if create_mask and not grid.has_mask: grid.get_mask(create=True) if grid.has_bounds: field = field.copy() field.x.bounds.attrs.pop('units', None) field.y.bounds.attrs.pop('units', None) return field
def get_wrapped_state(self, target): """ :param target: Return the wrapped state of a field. This function only checks grid centroids and geometry exteriors. Bounds/corners on the grid are excluded. :type target: :class:`~ocgis.Field` """ # TODO: Wrapped state should operate on the x-coordinate variable vectors or geometries only. # TODO: This should be a method on grids and geometry variables. from ocgis.collection.field import Field from ocgis.spatial.base import AbstractXYZSpatialContainer from ocgis import vm raise_if_empty(self) # If this is not a wrappable coordinate system, wrapped state is undefined. if not self.is_wrappable: ret = None else: if isinstance(target, Field): grid = target.grid if grid is not None: target = grid else: target = target.geom if target is None: raise WrappedStateEvalTargetMissing elif target.is_empty: ret = None elif isinstance(target, AbstractXYZSpatialContainer): ret = self._get_wrapped_state_from_array_(target.x.get_value()) else: stops = (WrappedState.WRAPPED, WrappedState.UNWRAPPED) ret = WrappedState.UNKNOWN geoms = target.get_masked_value().flat _is_masked = np.ma.is_masked _get_ws = self._get_wrapped_state_from_geometry_ for geom in geoms: if not _is_masked(geom): flag = _get_ws(geom) if flag in stops: ret = flag break rets = vm.gather(ret) if vm.rank == 0: rets = set(rets) if WrappedState.WRAPPED in rets: ret = WrappedState.WRAPPED elif WrappedState.UNWRAPPED in rets: ret = WrappedState.UNWRAPPED else: ret = list(rets)[0] else: ret = None ret = vm.bcast(ret) return ret
def test_reduce_reindex_coordinate_index(self): dist = OcgDist() dist.create_dimension('dim', 12, dist=True) dist.update_dimension_bounds() global_cindex_arr = np.array([4, 2, 1, 2, 1, 4, 1, 4, 2, 5, 6, 7]) if vm.rank == 0: var_cindex = Variable('cindex', value=global_cindex_arr, dimensions='dim') else: var_cindex = None var_cindex = variable_scatter(var_cindex, dist) vm.create_subcomm_by_emptyable('test', var_cindex, is_current=True) if vm.is_null: return raise_if_empty(var_cindex) coords = np.array([0, 11, 22, 33, 44, 55, 66, 77, 88, 99, 100, 110, 120, 130, 140, 150]) coords = Variable(name='coords', value=coords, dimensions='coord_dim') new_cindex, u_indices = reduce_reindex_coordinate_index(var_cindex) desired = coords[global_cindex_arr].get_value() if len(u_indices) > 0: new_coords = coords[u_indices].get_value() else: new_coords = np.array([]) gathered_new_coords = vm.gather(new_coords) gathered_new_cindex = vm.gather(new_cindex) if vm.rank == 0: gathered_new_coords = hgather(gathered_new_coords) gathered_new_cindex = hgather(gathered_new_cindex) actual = gathered_new_coords[gathered_new_cindex] self.assertAsSetEqual(gathered_new_cindex.tolist(), [2, 1, 0, 3, 4, 5]) desired_new_coords = [11, 22, 44, 55, 66, 77] self.assertAsSetEqual(gathered_new_coords.tolist(), desired_new_coords) self.assertEqual(len(gathered_new_coords), len(desired_new_coords)) self.assertNumpyAll(actual, desired)
def test_arange_from_dimension(self): dist = OcgDist() dim = dist.create_dimension('dim', size=7, dist=True) dist.update_dimension_bounds() actual = arange_from_dimension(dim, start=2, dtype=np.int64) actual = vm.gather(actual) if vm.rank == 0: actual = hgather(actual) desired = np.arange(2, 9, dtype=np.int64) self.assertNumpyAll(actual, desired)
def has_mask_global(self): """ Returns ``True`` if the global spatial object has a mask. Collective across the current VM. :rtype: bool """ raise_if_empty(self) has_masks = vm.gather(self.has_mask) if vm.rank == 0: has_mask = np.any(has_masks) else: has_mask = None has_mask = vm.bcast(has_mask) return has_mask
def has_masked_values_global(self): """ Returns ``True`` if the global spatial object's mask contains any masked values. Will return ``False`` if the global object has no mask. Collective across the current VM. :rtype: bool """ raise_if_empty(self) has_masks = vm.gather(self.has_masked_values) if vm.rank == 0: ret = np.any(has_masks) else: ret = None ret = vm.bcast(ret) return ret
def geom_type_global(self): """ :returns: global geometry type collective across the current :class:`~ocgis.OcgVM` :rtype: str :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) geom_types = vm.gather(self.geom_type) if vm.rank == 0: for g in geom_types: if g.startswith('Multi'): break else: g = None return vm.bcast(g)
def test_system_raise_exception_subcommunicator(self): if vm.size != 4: raise (SkipTest('vm.size != 4')) raiser = Mock(side_effect=IndexError('oops')) with self.assertRaises(IndexError): e = None with vm.scoped('the sub which will raise', [2]): if not vm.is_null: try: raiser() except IndexError as exc: e = exc es = vm.gather(e) es = vm.bcast(es) for e in es: if e is not None: raise e
def shape_global(self): """ Get the global shape across the current :class:`~ocgis.OcgVM`. :rtype: :class:`tuple` of :class:`int` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) maxd = [max(d.bounds_global) for d in self.dimensions] shapes = vm.gather(maxd) if vm.rank == 0: shape_global = tuple(np.max(shapes, axis=0)) else: shape_global = None shape_global = vm.bcast(shape_global) return shape_global
def get_extent_global(container): raise_if_empty(container) extent = container.extent extents = vm.gather(extent) if vm.rank == 0: extents = [e for e in extents if e is not None] extents = np.array(extents) ret = [None] * 4 ret[0] = np.min(extents[:, 0]) ret[1] = np.min(extents[:, 1]) ret[2] = np.max(extents[:, 2]) ret[3] = np.max(extents[:, 3]) ret = tuple(ret) else: ret = None ret = vm.bcast(ret) return ret
def test_gather(self): if MPI_SIZE != 8: raise SkipTest('MPI_SIZE != 8') vm = OcgVM() live_ranks = [1, 3, 7] # vm.set_live_ranks(live_ranks) vm.create_subcomm('tester', live_ranks, is_current=True) if MPI_RANK in live_ranks: value = MPI_RANK gathered_value = vm.gather(value) if MPI_RANK == 1: self.assertEqual(gathered_value, [1, 3, 7]) elif MPI_RANK in live_ranks: self.assertIsNone(gathered_value) vm.finalize()
def create_ugid_global(self, name, start=1): """ Same as :meth:`~ocgis.GeometryVariable.create_ugid` but collective across the current :class:`~ocgis.OcgVM`. :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) sizes = vm.gather(self.size) if vm.rank == 0: start = start for idx, n in enumerate(vm.ranks): if n == vm.rank: rank_start = start else: vm.comm.send(start, dest=n) start += sizes[idx] else: rank_start = vm.comm.recv(source=0) return self.create_ugid(name, start=rank_start)
def raise_if_empty(target, check_current=False): if check_current: from ocgis import vm gathered = vm.gather(target.is_empty) if vm.rank == 0: if any(gathered): msg = 'No empty {} objects allowed across the current VM.'.format( target.__class__) exc = EmptyObjectError(msg) try: raise exc finally: from ocgis import vm vm.abort(exc=exc) elif target.is_empty: msg = 'No empty {} objects allowed.'.format(target.__class__) exc = EmptyObjectError(msg) try: raise exc finally: from ocgis import vm vm.abort(exc=exc)
def get_extent_global(container): raise_if_empty(container) extent = container.extent extents = vm.gather(extent) # ocgis_lh(msg='extents={}'.format(extents), logger='spatial.base', level=logging.DEBUG) if vm.rank == 0: extents = [e for e in extents if e is not None] extents = np.array(extents) ret = [None] * 4 ret[0] = np.min(extents[:, 0]) ret[1] = np.min(extents[:, 1]) ret[2] = np.max(extents[:, 2]) ret[3] = np.max(extents[:, 3]) ret = tuple(ret) else: ret = None ret = vm.bcast(ret) return ret
def create_unique_global_array(arr): """ Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero elements will be returned. This call is collective across the current VM. :param arr: Input array for unique operation. :type arr: :class:`numpy.ndarray` :rtype: :class:`numpy.ndarray` :raises: ValueError """ from ocgis import vm if arr is None: raise ValueError('Input must be a NumPy array.') unique_local = np.unique(arr) vm.barrier() local_bounds = min(unique_local), max(unique_local) lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == vm.rank: continue contains = [] for lb2 in local_bounds: if lb[0] <= lb2 <= lb[1]: to_app = True else: to_app = False contains.append(to_app) if any(contains) or (local_bounds[0] <= lb[0] and local_bounds[1] >= lb[1]): overlaps.append(rank) # Send out the overlapping sources. tag_overlap = MPITag.OVERLAP_CHECK tag_select_send_size = MPITag.SELECT_SEND_SIZE vm.barrier() # NumPy and MPI types. np_type = unique_local.dtype mpi_type = vm.get_mpi_type(np_type) for o in overlaps: if vm.rank != o and vm.rank < o: dest_rank_bounds = lb_global[o] select_send = np.logical_and(unique_local >= dest_rank_bounds[0], unique_local <= dest_rank_bounds[1]) u_src = unique_local[select_send] select_send_size = u_src.size _ = vm.comm.Isend([np.array([select_send_size], dtype=np_type), mpi_type], dest=o, tag=tag_select_send_size) _ = vm.comm.Isend([u_src, mpi_type], dest=o, tag=tag_overlap) # Receive and process conflicts to reduce the unique local values. if vm.rank != 0: for o in overlaps: if vm.rank != o and vm.rank > o: select_send_size = np.array([0], dtype=np_type) req_select_send_size = vm.comm.Irecv([select_send_size, mpi_type], source=o, tag=tag_select_send_size) req_select_send_size.wait() select_send_size = select_send_size[0] u_src = np.zeros(select_send_size.astype(int), dtype=np_type) req = vm.comm.Irecv([u_src, mpi_type], source=o, tag=tag_overlap) req.wait() utokeep = np.ones_like(unique_local, dtype=bool) for uidx, u in enumerate(unique_local.flat): if u in u_src: utokeep[uidx] = False unique_local = unique_local[utokeep] vm.barrier() return unique_local
def get_periodicity_parameters(grid): """ Get characteristics of a grid's periodicity. This is only applicable for grids with a spherical coordinate system. There are two classifications: 1. A grid is periodic (i.e. it has global coverage). Periodicity is determined only with the x/longitude dimension. 2. A grid is non-periodic (i.e. it has regional coverage). Call is collective across the current VM. :param grid: :class:`~ocgis.Grid` :return: A dictionary containing periodicity parameters. :rtype: dict """ # Check if grid may be flagged as "periodic" by determining if its extent is global. Use the centroids and the grid # resolution to determine this. is_periodic = False col = grid.x.get_value() resolution = grid.resolution_x min_col, max_col = col.min(), col.max() # Work only with unwrapped coordinates. if min_col < 0: select = col < 0 if select.any(): max_col = np.max(col[col < 0]) + 360. select = col >= 0 if select.any(): min_col = np.min(col[col >= 0]) # Check the min and max column values are within a tolerance (the grid resolution) of global (0 to 360) edges. if (0. - resolution) <= min_col <= (0. + resolution): min_periodic = True else: min_periodic = False if (360. - resolution) <= max_col <= (360. + resolution): max_periodic = True else: max_periodic = False # Determin global periodicity. min_periodic = vm.gather(min_periodic) max_periodic = vm.gather(max_periodic) if vm.rank == 0: min_periodic = any(min_periodic) max_periodic = any(max_periodic) if min_periodic and max_periodic: is_periodic = True else: is_periodic = False is_periodic = vm.bcast(is_periodic) # If the grid is periodic, set the appropriate parameters. if is_periodic: num_peri_dims = 1 periodic_dim = 0 pole_dim = 1 else: num_peri_dims, pole_dim, periodic_dim = [None] * 3 ret = { 'num_peri_dims': num_peri_dims, 'pole_dim': pole_dim, 'periodic_dim': periodic_dim } return ret
def create_unique_global_array(arr): """ Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero elements will be returned. This call is collective across the current VM. :param arr: Input array for unique operation. :type arr: :class:`numpy.ndarray` :rtype: :class:`numpy.ndarray` :raises: ValueError """ from ocgis import vm if arr is None: raise ValueError('Input must be a NumPy array.') unique_local = np.unique(arr) vm.barrier() local_bounds = min(unique_local), max(unique_local) lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == vm.rank: continue contains = [] for lb2 in local_bounds: if lb[0] <= lb2 <= lb[1]: to_app = True else: to_app = False contains.append(to_app) if any(contains) or (local_bounds[0] <= lb[0] and local_bounds[1] >= lb[1]): overlaps.append(rank) # Send out the overlapping sources. tag_overlap = MPITag.OVERLAP_CHECK tag_select_send_size = MPITag.SELECT_SEND_SIZE vm.barrier() # NumPy and MPI types. np_type = unique_local.dtype mpi_type = vm.get_mpi_type(np_type) for o in overlaps: if vm.rank != o and vm.rank < o: dest_rank_bounds = lb_global[o] select_send = np.logical_and(unique_local >= dest_rank_bounds[0], unique_local <= dest_rank_bounds[1]) u_src = unique_local[select_send] select_send_size = u_src.size _ = vm.comm.Isend( [np.array([select_send_size], dtype=np_type), mpi_type], dest=o, tag=tag_select_send_size) _ = vm.comm.Isend([u_src, mpi_type], dest=o, tag=tag_overlap) # Receive and process conflicts to reduce the unique local values. if vm.rank != 0: for o in overlaps: if vm.rank != o and vm.rank > o: select_send_size = np.array([0], dtype=np_type) req_select_send_size = vm.comm.Irecv( [select_send_size, mpi_type], source=o, tag=tag_select_send_size) req_select_send_size.wait() select_send_size = select_send_size[0] u_src = np.zeros(select_send_size.astype(int), dtype=np_type) req = vm.comm.Irecv([u_src, mpi_type], source=o, tag=tag_overlap) req.wait() utokeep = np.ones_like(unique_local, dtype=bool) for uidx, u in enumerate(unique_local.flat): if u in u_src: utokeep[uidx] = False unique_local = unique_local[utokeep] vm.barrier() return unique_local
def test_system_converting_state_boundaries_shapefile(self): ocgis.env.USE_NETCDF4_MPI = False # tdk:FIX: this hangs in the STATE_FIPS write for asynch might be nc4 bug... keywords = {'transform_to_crs': [None, Spherical], 'use_geometry_iterator': [False, True]} actual_xsums = [] actual_ysums = [] for k in self.iter_product_keywords(keywords): if k.use_geometry_iterator and k.transform_to_crs is not None: to_crs = k.transform_to_crs() else: to_crs = None if k.transform_to_crs is None: desired_crs = WGS84() else: desired_crs = k.transform_to_crs() rd = RequestDataset(uri=self.path_state_boundaries, variable=['UGID', 'ID']) rd.metadata['schema']['geometry'] = 'MultiPolygon' field = rd.get() self.assertEqual(len(field.data_variables), 2) # Test there is no mask present. field.geom.load() self.assertFalse(field.geom.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, field) self.assertIsNone(field.dimension_map.get_spatial_mask()) self.assertEqual(field.crs, WGS84()) if k.transform_to_crs is not None: field.update_crs(desired_crs) self.assertEqual(len(field.data_variables), 2) self.assertEqual(len(field.geom.parent.data_variables), 2) try: gc = field.geom.convert_to(pack=False, use_geometry_iterator=k.use_geometry_iterator, to_crs=to_crs) except ValueError as e: try: self.assertFalse(k.use_geometry_iterator) self.assertIsNotNone(to_crs) except AssertionError: raise e else: continue actual_xsums.append(gc.x.get_value().sum()) actual_ysums.append(gc.y.get_value().sum()) self.assertEqual(gc.crs, desired_crs) # Test there is no mask present after conversion to geometry coordinates. self.assertFalse(gc.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, gc.parent) self.assertIsNone(gc.dimension_map.get_spatial_mask()) path = self.get_temporary_file_path('esmf_state_boundaries.nc') self.assertEqual(gc.parent.crs, desired_crs) gc.parent.write(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) gathered_geoms = vm.gather(field.geom.get_value()) if vm.rank == 0: actual_geoms = [] for g in gathered_geoms: actual_geoms.extend(g) rd = RequestDataset(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) infield = rd.get() self.assertEqual(create_crs(infield.crs.value), desired_crs) for dv in field.data_variables: self.assertIn(dv.name, infield) ingrid = infield.grid self.assertIsInstance(ingrid, GridUnstruct) for g in ingrid.archetype.iter_geometries(): self.assertPolygonSimilar(g[1], actual_geoms[g[0]], check_type=False) vm.barrier() # Test coordinates have actually changed. if not k.use_geometry_iterator: for ctr, to_test in enumerate([actual_xsums, actual_ysums]): for lhs, rhs in itertools.combinations(to_test, 2): if ctr == 0: self.assertAlmostEqual(lhs, rhs) else: self.assertNotAlmostEqual(lhs, rhs)
live_ranks = vm.get_live_ranks_from_object(grid_sub) bbox = vm.bcast(extent_global, root=live_ranks[0]) vm.barrier() if vm.rank == 0: print 'starting bbox subset:', bbox vm.barrier() has_subset = get_subset(bbox, subset_filename, 1) vm.barrier() if vm.rank == 0: print 'finished bbox subset:', bbox vm.barrier() has_subset = vm.gather(has_subset) if vm.rank == 0: if any(has_subset): has_subset = True ctr += 1 else: has_subset = False ctr = vm.bcast(ctr) has_subset = vm.bcast(has_subset) if has_subset: with vm.scoped_by_emptyable('dst subset write', grid_sub): if not vm.is_null: grid_sub.parent.write(dst_subset_filename)
def test_get_intersects(self): subset_geom = self.fixture_subset_geom() poly = self.fixture() # Scatter the polygon geometry coordinates for the parallel case =============================================== dist = OcgDist() for d in poly.parent.dimensions.values(): d = d.copy() if d.name == poly.dimensions[0].name: d.dist = True dist.add_dimension(d) dist.update_dimension_bounds() poly.parent = variable_collection_scatter(poly.parent, dist) vm.create_subcomm_by_emptyable('scatter', poly, is_current=True) if vm.is_null: return poly.parent._validate_() for v in poly.parent.values(): self.assertEqual(id(v.parent), id(poly.parent)) self.assertEqual(len(v.parent), len(poly.parent)) # ============================================================================================================== # p = os.path.join('/tmp/subset_geom.shp') # s = GeometryVariable.from_shapely(subset_geom) # s.write_vector(p) # p = os.path.join('/tmp/poly.shp') # s = poly.convert_to() # s.write_vector(p) sub = poly.get_intersects(subset_geom) vm.create_subcomm_by_emptyable('after intersects', sub, is_current=True) if vm.is_null: return actual = [] for g in sub.iter_geometries(): if g[1] is not None: actual.append([g[1].centroid.x, g[1].centroid.y]) desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]] actual = vm.gather(actual) if vm.rank == 0: gactual = [] for a in actual: for ia in a: gactual.append(ia) self.assertEqual(gactual, desired) self.assertEqual(len(sub.parent), len(poly.parent)) sub.parent._validate_() sub2 = sub.reduce_global() sub2.parent._validate_() # p = os.path.join('/tmp/sub.shp') # s = sub.convert_to() # s.write_vector(p) # p = os.path.join('/tmp/sub2.shp') # s = sub2.convert_to() # s.write_vector(p) # Gather then broadcast coordinates so all coordinates are available on each process. to_add = [] for gather_target in [sub2.x, sub2.y]: gathered = variable_gather(gather_target.extract()) gathered = vm.bcast(gathered) to_add.append(gathered) for t in to_add: sub2.parent.add_variable(t, force=True) for ctr, to_check in enumerate([sub, sub2]): actual = [] for g in to_check.iter_geometries(): if g[1] is not None: actual.append([g[1].centroid.x, g[1].centroid.y]) desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]] actual = vm.gather(actual) if vm.rank == 0: gactual = [] for a in actual: for ia in a: gactual.append(ia) self.assertEqual(gactual, desired)
def get_distributed_slice(self, slc): """ Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be empty. :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy ``slc`` is not manipulated or redistributed prior to slicing. :rtype: :class:`~ocgis.Dimension` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) slc = get_formatted_slice(slc, 1)[0] is_fancy = not isinstance(slc, slice) if not is_fancy and slc == slice(None): ret = self.copy() # Use standard slicing for non-distributed dimensions. elif not self.dist: ret = self[slc] else: if is_fancy: local_slc = slc else: local_slc = get_global_to_local_slice((slc.start, slc.stop), self.bounds_local) if local_slc is not None: local_slc = slice(*local_slc) # Slice does not overlap local bounds. The dimension is now empty with size 0. if local_slc is None: ret = self.copy() ret.convert_to_empty() dimension_size = 0 # Slice overlaps so do a slice on the dimension using the local slice. else: ret = self[local_slc] dimension_size = len(ret) assert dimension_size >= 0 dimension_sizes = vm.gather(dimension_size) if vm.rank == 0: sum_dimension_size = 0 for ds in dimension_sizes: try: sum_dimension_size += ds except TypeError: pass bounds_global = (0, sum_dimension_size) else: bounds_global = None bounds_global = vm.bcast(bounds_global) if not ret.is_empty: ret.bounds_global = bounds_global # Normalize the local bounds on live ranks. inner_live_ranks = get_nonempty_ranks(ret, vm) with vm.scoped('bounds normalization', inner_live_ranks): if not vm.is_null: if vm.rank == 0: adjust = len(ret) else: adjust = None adjust = vm.bcast(adjust) for current_rank in vm.ranks: if vm.rank == current_rank: if vm.rank != 0: ret.bounds_local = [ b + adjust for b in ret.bounds_local ] adjust += len(ret) vm.barrier() adjust = vm.bcast(adjust, root=current_rank) return ret
def redistribute_by_src_idx(variable, dimname, dimension): """ Redistribute values in ``variable`` using the source index associated with ``dimension``. The reloads the data from source and does not do an in-memory redistribution using MPI. This function is collective across the current `~ocgis.OcgVM`. * Uses fancy indexing only. * Gathers all source indices to a single processor. :param variable: The variable to redistribute. :type variable: :class:`~ocgis.Variable` :param str dimname: The name of the dimension holding the source indices. :param dimension: The dimension object. :type dimension: :class:`~ocgis.Dimension` """ from ocgis import SourcedVariable, Variable, vm from ocgis.variable.dimension import create_src_idx assert isinstance(variable, SourcedVariable) assert dimname is not None # If this is a serial operation just return. The rank should be fully autonomous in terms of its source information. if vm.size == 1: return # There needs to be at least one rank to redistribute. live_ranks = vm.get_live_ranks_from_object(variable) if len(live_ranks) == 0: raise ValueError('There must be at least one rank to redistribute by source index.') # Remove relevant values from a variable. def _reset_variable_(target): target._is_empty = None target._mask = None target._value = None target._has_initialized_value = False # Gather the sliced dimensions. This dimension hold the source indices that are redistributed. dims_global = vm.gather(dimension) if vm.rank == 0: # Filter any none-type dimensions to handle currently empty ranks. dims_global = [d for d in dims_global if d is not None] # Convert any bounds-type source indices to fancy type. # TODO: Support bounds-type source indices. for d in dims_global: if d._src_idx_type == SourceIndexType.BOUNDS: d._src_idx = create_src_idx(*d._src_idx, si_type=SourceIndexType.FANCY) # Create variable to scatter that holds the new global source indices. global_src_idx = hgather([d._src_idx for d in dims_global]) global_src_idx = Variable(name='global_src_idx', value=global_src_idx, dimensions=dimname) # The new size is also needed to create a regular distribution for the variable scatter. global_src_idx_size = global_src_idx.size else: global_src_idx, global_src_idx_size = [None] * 2 # Build the new distribution based on the gathered source indices. global_src_idx_size = vm.bcast(global_src_idx_size) dest_dist = OcgDist() new_dim = dest_dist.create_dimension(dimname, global_src_idx_size, dist=True) dest_dist.update_dimension_bounds() # This variable holds the new source indices. new_rank_src_idx = variable_scatter(global_src_idx, dest_dist) if new_rank_src_idx.is_empty: # Support new empty ranks following the scatter. variable.convert_to_empty() else: # Reset the variable so everything can be loaded from source. _reset_variable_(variable) # Update the source index on the target dimension. new_dim._src_idx = new_rank_src_idx.get_value() # Add the dimension with the new source index to the collection. variable.parent.dimensions[dimname] = new_dim # All emptiness should be pushed back to the dimensions. variable.parent._is_empty = None for var in variable.parent.values(): var._is_empty = None # Any variables that have a shared dimension should also be reset. for var in variable.parent.values(): if dimname in var.dimension_names: if new_rank_src_idx.is_empty: var.convert_to_empty() else: _reset_variable_(var)
def reduce_reindex_coordinate_index(cindex, start_index=0): """ Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure. Function will not respect masks. The function returns a two-element tuple: * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing. * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external coordinate storage variable or array. :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may also be a NumPy array. :type cindex: :class:`~ocgis.Variable` | :class:`~numpy.ndarray` :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``. :rtype: tuple """ # Get the coordinate index values as a NumPy array. try: cindex = cindex.get_value() except AttributeError: # Assume this is already a NumPy array. pass # Only work with 1D arrays. cindex = np.atleast_1d(cindex) # Used to return the coordinate index to the original shape of the incoming coordinate index. original_shape = cindex.shape cindex = cindex.flatten() # Create the unique coordinate index array. # barrier_print('before create_unique_global_array') u = np.array(create_unique_global_array(cindex)) # barrier_print('after create_unique_global_array') # Synchronize the data type for the new coordinate index. lrank = vm.rank if lrank == 0: dtype = u.dtype else: dtype = None dtype = vm.bcast(dtype) # Flag to indicate if the current rank has any unique values. has_u = len(u) > 0 # Create the new coordinate index. new_u_dimension = create_distributed_dimension(len(u), name='__new_u_dimension__') new_u = arange_from_dimension(new_u_dimension, start=start_index, dtype=dtype) # Create a hash for the new index. This is used to remap the old coordinate index. if has_u: uidx = {ii: jj for ii, jj in zip(u, new_u)} else: uidx = None vm.barrier() # Construct local bounds for the rank's unique value. This is used as a cheap index when ranks are looking for # index overlaps. if has_u: local_bounds = min(u), max(u) else: local_bounds = None # Put a copy for the bounds indexing on each rank. lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == lrank: continue if lb is not None: contains = lb[0] <= cindex contains = np.logical_and(lb[1] >= cindex, contains) if np.any(contains): overlaps.append(rank) # Ranks must be able to identify which ranks will be asking them for data. global_overlaps = vm.gather(overlaps) global_overlaps = vm.bcast(global_overlaps) destinations = [ii for ii, jj in enumerate(global_overlaps) if vm.rank in jj] # MPI communication tags used in the algorithm. tag_search = MPITag.REDUCE_REINDEX_SEARCH tag_success = MPITag.REDUCE_REINDEX_SUCCESS tag_child_finished = MPITag.REDUCE_REINDEX_CHILD_FINISHED tag_found = MPITag.REDUCE_REINDEX_FOUND # Fill array for the new coordinate index. new_cindex = np.empty_like(cindex) # vm.barrier_print('starting run_rr') # Fill the new coordinate indexing. if lrank == 0: run_rr_root(new_cindex, cindex, uidx, destinations, tag_child_finished, tag_found, tag_search, tag_success) else: run_rr_nonroot(new_cindex, cindex, uidx, destinations, has_u, overlaps, tag_child_finished, tag_found, tag_search, tag_success) # vm.barrier_print('finished run_rr') # Return array to its original shape. new_cindex = new_cindex.reshape(*original_shape) vm.barrier() return new_cindex, u
def variable_gather(variable, root=0): from ocgis import vm if variable.is_empty: raise ValueError('No empty variables allowed.') if vm.size > 1: if vm.rank == root: new_variable = variable.copy() new_variable.dtype = variable.dtype new_variable._mask = None new_variable._value = None new_variable._dimensions = None assert not new_variable.has_allocated_value else: new_variable = None else: new_variable = variable for dim in new_variable.dimensions: dim.dist = False if vm.size > 1: if vm.rank == root: new_dimensions = [None] * variable.ndim else: new_dimensions = None for idx, dim in enumerate(variable.dimensions): if dim.dist: parts = vm.gather(dim) if vm.rank == root: new_dim = dim.copy() if dim.dist: new_size = 0 has_src_idx = False for part in parts: has_src_idx = part._src_idx is not None new_size += len(part) if has_src_idx: if part._src_idx_type == SourceIndexType.FANCY: new_src_idx = np.zeros(new_size, dtype=DataType.DIMENSION_SRC_INDEX) for part in parts: new_src_idx[part.bounds_local[0]: part.bounds_local[1]] = part._src_idx else: part_bounds = [None] * len(parts) for idx2, part in enumerate(parts): part_bounds[idx2] = part._src_idx part_bounds = np.array(part_bounds) new_src_idx = (part_bounds.min(), part_bounds.max()) else: new_src_idx = None new_dim = dim.copy() new_dim.set_size(new_size, src_idx=new_src_idx) new_dim.dist = False new_dimensions[idx] = new_dim if vm.rank == root: new_variable.set_dimensions(new_dimensions, force=True) gathered_variables = vm.gather(variable) if vm.rank == root: for idx, gv in enumerate(gathered_variables): destination_slice = [slice(*dim.bounds_local) for dim in gv.dimensions] new_variable.__setitem__(destination_slice, gv) return new_variable else: return else: return new_variable
def test_get_intersects(self): self.add_barrier = False subset_geom = self.fixture_subset_geom() poly = self.fixture() dist = OcgDist() for d in poly.parent.dimensions.values(): d = d.copy() if d.name == poly.dimensions[0].name: d.dist = True dist.add_dimension(d) dist.update_dimension_bounds() poly.parent = variable_collection_scatter(poly.parent, dist) vm.create_subcomm_by_emptyable('scatter', poly, is_current=True) if vm.is_null: return poly.parent._validate_() for v in poly.parent.values(): self.assertEqual(id(v.parent), id(poly.parent)) self.assertEqual(len(v.parent), len(poly.parent)) sub = poly.get_intersects(subset_geom) vm.create_subcomm_by_emptyable('after intersects', sub, is_current=True) if vm.is_null: return actual = [] for g in sub.iter_geometries(): if g[1] is not None: actual.append([g[1].centroid.x, g[1].centroid.y]) desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]] actual = vm.gather(actual) if vm.rank == 0: gactual = [] for a in actual: for ia in a: gactual.append(ia) self.assertEqual(gactual, desired) self.assertEqual(len(sub.parent), len(poly.parent)) sub.parent._validate_() sub2 = sub.reduce_global() sub2.parent._validate_() # Gather then broadcast coordinates so all coordinates are available on each process. to_add = [] for gather_target in [sub2.x, sub2.y]: gathered = variable_gather(gather_target.extract()) gathered = vm.bcast(gathered) to_add.append(gathered) for t in to_add: sub2.parent.add_variable(t, force=True) for ctr, to_check in enumerate([sub, sub2]): actual = [] for g in to_check.iter_geometries(): if g[1] is not None: actual.append([g[1].centroid.x, g[1].centroid.y]) desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]] actual = vm.gather(actual) if vm.rank == 0: gactual = [] for a in actual: for ia in a: gactual.append(ia) self.assertEqual(gactual, desired)
def get_distributed_slice(self, slc): """ Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be empty. :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy ``slc`` is not manipulated or redistributed prior to slicing. :rtype: :class:`~ocgis.Dimension` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) slc = get_formatted_slice(slc, 1)[0] is_fancy = not isinstance(slc, slice) if not is_fancy and slc == slice(None): ret = self.copy() # Use standard slicing for non-distributed dimensions. elif not self.dist: ret = self[slc] else: if is_fancy: local_slc = slc else: local_slc = get_global_to_local_slice((slc.start, slc.stop), self.bounds_local) if local_slc is not None: local_slc = slice(*local_slc) # Slice does not overlap local bounds. The dimension is now empty with size 0. if local_slc is None: ret = self.copy() ret.convert_to_empty() dimension_size = 0 # Slice overlaps so do a slice on the dimension using the local slice. else: ret = self[local_slc] dimension_size = len(ret) assert dimension_size >= 0 dimension_sizes = vm.gather(dimension_size) if vm.rank == 0: sum_dimension_size = 0 for ds in dimension_sizes: try: sum_dimension_size += ds except TypeError: pass bounds_global = (0, sum_dimension_size) else: bounds_global = None bounds_global = vm.bcast(bounds_global) if not ret.is_empty: ret.bounds_global = bounds_global # Normalize the local bounds on live ranks. inner_live_ranks = get_nonempty_ranks(ret, vm) with vm.scoped('bounds normalization', inner_live_ranks): if not vm.is_null: if vm.rank == 0: adjust = len(ret) else: adjust = None adjust = vm.bcast(adjust) for current_rank in vm.ranks: if vm.rank == current_rank: if vm.rank != 0: ret.bounds_local = [b + adjust for b in ret.bounds_local] adjust += len(ret) vm.barrier() adjust = vm.bcast(adjust, root=current_rank) return ret
def reduce_reindex_coordinate_variables(cindex, start_index=0): """ Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The coordinate values contained in ``coords`` will be reduced to match the coordinates required by the indices in ``cindex``. The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure. Function will not respect masks. The function returns a two-element tuple: * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing. * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external coordinate storage variable or array. :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may also be a NumPy array. :type cindex: :class:`~ocgis.Variable` || :class:`~numpy.ndarray` :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``. :rtype: tuple """ # Get the coordinate index values as a NumPy array. try: cindex = cindex.get_value() except AttributeError: # Assume this is already a NumPy array. pass # Create the unique coordinte index array. u = np.array(create_unique_global_array(cindex)) # Holds re-indexed values. new_cindex = np.empty_like(cindex) # Caches the local re-indexing for the process. cache = {} # Increment the indexing values based on its presence in the cache. curr_idx = 0 for idx, to_reindex in enumerate(u.flat): if to_reindex not in cache: cache[to_reindex] = curr_idx curr_idx += 1 # MPI communication tags. tag_cache_create = MPITag.REINDEX_CACHE_CREATE tag_cache_get_recv = MPITag.REINDEX_CACHE_GET_RECV tag_cache_get_send = MPITag.REINDEX_CACHE_GET_SEND # This is the local offset to move sequentially across processes. If the local cache is empty, there is no # offsetting to move between tasks. if len(cache) > 0: offset = max(cache.values()) + 1 else: offset = 0 # Synchronize the processes with the appropriate local offset. for idx, rank in enumerate(vm.ranks): try: dest_rank = vm.ranks[idx + 1] except IndexError: break else: if vm.rank == rank: vm.comm.send(start_index + offset, dest=dest_rank, tag=tag_cache_create) elif vm.rank == dest_rank: offset_previous = vm.comm.recv(source=rank, tag=tag_cache_create) start_index = offset_previous vm.barrier() # Find any missing local coordinate indices that are not mapped by the local cache. is_missing = False is_missing_indices = [] for idx, to_reindex in enumerate(cindex.flat): try: local_new_cindex = cache[to_reindex] except KeyError: is_missing = True is_missing_indices.append(idx) else: new_cindex[idx] = local_new_cindex + start_index # Check if there are any processors missing their new index values. is_missing_global = vm.gather(is_missing) if vm.rank == 0: is_missing_global = any(is_missing_global) is_missing_global = vm.bcast(is_missing_global) # Execute a search across the process caches for any missing coordinate index values. if is_missing_global: for rank in vm.ranks: is_missing_rank = vm.bcast(is_missing, root=rank) if is_missing_rank: n_missing = vm.bcast(len(is_missing_indices), root=rank) if vm.rank == rank: for imi in is_missing_indices: for subrank in vm.ranks: if vm.rank != subrank: vm.comm.send(cindex[imi], dest=subrank, tag=tag_cache_get_recv) new_cindex_element = vm.comm.recv( source=subrank, tag=tag_cache_get_send) if new_cindex_element is not None: new_cindex[imi] = new_cindex_element else: for _ in range(n_missing): curr_missing = vm.comm.recv(source=rank, tag=tag_cache_get_recv) new_cindex_element = cache.get(curr_missing) if new_cindex_element is not None: new_cindex_element += start_index vm.comm.send(new_cindex_element, dest=rank, tag=tag_cache_get_send) return new_cindex, u
def get_unioned(self, dimensions=None, union_dimension=None, spatial_average=None, root=0): """ Unions _unmasked_ geometry objects. Collective across the current :class:`~ocgis.OcgVM`. """ # TODO: optimize! # Get dimension names and lengths for the dimensions to union. if dimensions is None: dimensions = self.dimensions dimension_names = get_dimension_names(dimensions) dimension_lengths = [ len(self.parent.dimensions[dn]) for dn in dimension_names ] # Get the variables to spatial average. if spatial_average is not None: variable_names_to_weight = get_variable_names(spatial_average) else: variable_names_to_weight = [] # Get the new dimensions for the geometry variable. The union dimension is always the last dimension. if union_dimension is None: from ocgis.variable.dimension import Dimension union_dimension = Dimension( constants.DimensionName.UNIONED_GEOMETRY, 1) new_dimensions = [] for dim in self.dimensions: if dim.name not in dimension_names: new_dimensions.append(dim) new_dimensions.append(union_dimension) # Configure the return variable. ret = self.copy() if spatial_average is None: ret = ret.extract() ret.set_mask(None) ret.set_value(None) ret.set_dimensions(new_dimensions) ret.allocate_value() # Destination indices in the return variable are filled with non-masked, unioned geometries. for dst_indices in product( * [list(range(dl)) for dl in get_dimension_lengths(new_dimensions)]): dst_slc = { new_dimensions[ii].name: dst_indices[ii] for ii in range(len(new_dimensions)) } # Select the geometries to union skipping any masked geometries. to_union = deque() for indices in product( *[list(range(dl)) for dl in dimension_lengths]): dslc = { dimension_names[ii]: indices[ii] for ii in range(len(dimension_names)) } sub = self[dslc] sub_mask = sub.get_mask() if sub_mask is None: to_union.append(sub.get_value().flatten()[0]) else: if not sub_mask.flatten()[0]: to_union.append(sub.get_value().flatten()[0]) # Execute the union operation. processed_to_union = deque() for geom in to_union: if isinstance(geom, MultiPolygon) or isinstance( geom, MultiPoint): for element in geom: processed_to_union.append(element) else: processed_to_union.append(geom) unioned = cascaded_union(processed_to_union) # Pull unioned geometries and union again for the final unioned geometry. if vm.size > 1: unioned_gathered = vm.gather(unioned) if vm.rank == root: unioned = cascaded_union(unioned_gathered) # Fill the return geometry variable value with the unioned geometry. to_fill = ret[dst_slc].get_value() to_fill[0] = unioned # Spatial average shared dimensions. if spatial_average is not None: # Get source data to weight. for var_to_weight in filter( lambda ii: ii.name in variable_names_to_weight, list(self.parent.values())): # Holds sizes of dimensions to iterate. These dimension are not squeezed by the weighted averaging. range_to_itr = [] # Holds the names of dimensions to squeeze. names_to_itr = [] # Dimension names that are squeezed. Also the dimensions for the weight matrix. names_to_slice_all = [] for dn in var_to_weight.dimensions: if dn.name in self.dimension_names: names_to_slice_all.append(dn.name) else: range_to_itr.append(len(dn)) names_to_itr.append(dn.name) # Reference the weights on the source geometry variable. weights = self[{ nsa: slice(None) for nsa in names_to_slice_all }].weights # Path if there are iteration dimensions. Checks for axes ordering in addition. if len(range_to_itr) > 0: # New dimensions for the spatially averaged variable. Unioned dimension is always last. Remove the # dimensions aggregated by the weighted average. new_dimensions = [ dim for dim in var_to_weight.dimensions if dim.name not in dimension_names ] new_dimensions.append(union_dimension) # Prepare the spatially averaged variable. target = ret.parent[var_to_weight.name] target.set_mask(None) target.set_value(None) target.set_dimensions(new_dimensions) target.allocate_value() # Swap weight axes to make sure they align with the target variable. swap_chain = get_swap_chain(dimension_names, names_to_slice_all) if len(swap_chain) > 0: weights = weights.copy() for sc in swap_chain: weights = weights.swapaxes(*sc) # The main weighting loop. Can get quite intensive with many, large iteration dimensions. len_names_to_itr = len(names_to_itr) slice_none = slice(None) squeeze_out = [ ii for ii, dim in enumerate(var_to_weight.dimensions) if dim.name in names_to_itr ] should_squeeze = True if len(squeeze_out) > 0 else False np_squeeze = np.squeeze np_atleast_1d = np.atleast_1d np_ma_average = np.ma.average for nonweighted_indices in product( *[list(range(ri)) for ri in range_to_itr]): w_slc = { names_to_itr[ii]: nonweighted_indices[ii] for ii in range(len_names_to_itr) } for nsa in names_to_slice_all: w_slc[nsa] = slice_none data_to_weight = var_to_weight[w_slc].get_masked_value( ) if should_squeeze: data_to_weight = np_squeeze( data_to_weight, axis=tuple(squeeze_out)) weighted_value = np_atleast_1d( np_ma_average(data_to_weight, weights=weights)) target[w_slc].get_value()[:] = weighted_value else: target_to_weight = var_to_weight.get_masked_value() # Sort to minimize floating point sum errors. target_to_weight = target_to_weight.flatten() weights = weights.flatten() sindices = np.argsort(target_to_weight) target_to_weight = target_to_weight[sindices] weights = weights[sindices] weighted_value = np.atleast_1d( np.ma.average(target_to_weight, weights=weights)) target = ret.parent[var_to_weight.name] target.set_mask(None) target.set_value(None) target.set_dimensions(new_dimensions) target.set_value(weighted_value) # Collect areas of live ranks and convert to weights. if vm.size > 1: # If there is no area information (points for example, we need to use counts). if ret.area.data[0].max() == 0: weight_or_proxy = float(self.size) else: weight_or_proxy = ret.area.data[0] if vm.rank != root: vm.comm.send(weight_or_proxy, dest=root) else: live_rank_areas = [weight_or_proxy] for tner in vm.ranks: if tner != vm.rank: recv_area = vm.comm.recv(source=tner) live_rank_areas.append(recv_area) live_rank_areas = np.array(live_rank_areas) rank_weights = live_rank_areas / np.max(live_rank_areas) for var_to_weight in filter( lambda ii: ii.name in variable_names_to_weight, list(ret.parent.values())): dimensions_to_itr = [ dim.name for dim in var_to_weight.dimensions if dim.name != union_dimension.name ] slc = {union_dimension.name: 0} for idx_slc in var_to_weight.iter_dict_slices( dimensions=dimensions_to_itr): idx_slc.update(slc) to_weight = var_to_weight[idx_slc].get_value().flatten( )[0] if vm.rank == root: collected_to_weight = [to_weight] if not vm.rank == root: vm.comm.send(to_weight, dest=root) else: for tner in vm.ranks: if not tner == root: recv_to_weight = vm.comm.recv(source=tner) collected_to_weight.append(recv_to_weight) # Sort to minimize floating point sum errors. collected_to_weight = np.array(collected_to_weight) sindices = np.argsort(collected_to_weight) collected_to_weight = collected_to_weight[sindices] rank_weights = rank_weights[sindices] weighted = np.atleast_1d( np.ma.average(collected_to_weight, weights=rank_weights)) var_to_weight[idx_slc].get_value()[:] = weighted if vm.rank == root: return ret else: return
def test_system_spatial_averaging_through_operations_state_boundaries(self): if MPI_SIZE != 8: raise SkipTest('MPI_SIZE != 8') ntime = 3 # Get the exact field value for the state's representative center. with vm.scoped([0]): if MPI_RANK == 0: states = RequestDataset(self.path_state_boundaries, driver='vector').get() states.update_crs(env.DEFAULT_COORDSYS) fill = np.zeros((states.geom.shape[0], 2)) for idx, geom in enumerate(states.geom.get_value().flat): centroid = geom.centroid fill[idx, :] = centroid.x, centroid.y exact_states = create_exact_field_value(fill[:, 0], fill[:, 1]) state_ugid = states['UGID'].get_value() area = states.geom.area keywords = { 'spatial_operation': [ 'clip', 'intersects' ], 'aggregate': [ True, False ], 'wrapped': [True, False], 'output_format': [ OutputFormatName.OCGIS, 'csv', 'csv-shp', 'shp' ], } # total_iterations = len(list(self.iter_product_keywords(keywords))) for ctr, k in enumerate(self.iter_product_keywords(keywords)): # barrier_print(k) # if ctr % 1 == 0: # if vm.is_root: # print('Iteration {} of {}...'.format(ctr + 1, total_iterations)) with vm.scoped([0]): if vm.is_root: grid = create_gridxy_global(resolution=1.0, dist=False, wrapped=k.wrapped) field = create_exact_field(grid, 'foo', ntime=ntime) path = self.get_temporary_file_path('foo.nc') field.write(path) else: path = None path = MPI_COMM.bcast(path) rd = RequestDataset(path) ops = OcgOperations(dataset=rd, geom='state_boundaries', spatial_operation=k.spatial_operation, aggregate=k.aggregate, output_format=k.output_format, prefix=str(ctr), # geom_select_uid=[8] ) ret = ops.execute() # Test area is preserved for a problem element during union. The union's geometry was not fully represented # in the output. if k.output_format == 'shp' and k.aggregate and k.spatial_operation == 'clip': with vm.scoped([0]): if vm.is_root: inn = RequestDataset(ret).get() inn_ugid_idx = np.where(inn['UGID'].get_value() == 8)[0][0] ugid_idx = np.where(state_ugid == 8)[0][0] self.assertAlmostEqual(inn.geom.get_value()[inn_ugid_idx].area, area[ugid_idx], places=2) # Test the overview geometry shapefile is written. if k.output_format == 'shp': directory = os.path.split(ret)[0] contents = os.listdir(directory) actual = ['_ugid.shp' in c for c in contents] self.assertTrue(any(actual)) elif k.output_format == 'csv-shp': directory = os.path.split(ret)[0] directory = os.path.join(directory, 'shp') contents = os.listdir(directory) actual = ['_ugid.shp' in c for c in contents] self.assertTrue(any(actual)) if not k.aggregate: actual = ['_gid.shp' in c for c in contents] self.assertTrue(any(actual)) if k.output_format == OutputFormatName.OCGIS: geom_keys = ret.children.keys() all_geom_keys = vm.gather(np.array(geom_keys)) if vm.is_root: all_geom_keys = hgather(all_geom_keys) self.assertEqual(len(np.unique(all_geom_keys)), 51) if k.aggregate: actual = Dict() for field, container in ret.iter_fields(yield_container=True): if not field.is_empty: ugid = container.geom.ugid.get_value()[0] actual[ugid]['actual'] = field.data_variables[0].get_value() actual[ugid]['area'] = container.geom.area[0] actual = vm.gather(actual) if vm.is_root: actual = dgather(actual) ares = [] actual_areas = [] for ugid_key, v in actual.items(): ugid_idx = np.where(state_ugid == ugid_key)[0][0] desired = exact_states[ugid_idx] actual_areas.append(v['area']) for tidx in range(ntime): are = np.abs((desired + ((tidx + 1) * 10)) - v['actual'][tidx, 0]) ares.append(are) if k.spatial_operation == 'clip': diff = np.abs(np.array(area) - np.array(actual_areas)) self.assertLess(np.max(diff), 1e-6) self.assertLess(np.mean(diff), 1e-6) # Test relative errors. self.assertLess(np.max(ares), 0.031) self.assertLess(np.mean(ares), 0.009)
def get_masking_slice(intersects_mask_value, target, apply_slice=True): """ Collective! :param intersects_mask_value: The mask to use for creating the slice indices. :type intersects_mask_value: :class:`numpy.ndarray`, dtype=bool :param target: The target slicable object to slice. :param bool apply_slice: If ``True``, apply the slice. """ raise_if_empty(target) if intersects_mask_value is None: local_slice = None else: if intersects_mask_value.all(): local_slice = None elif not intersects_mask_value.any(): shp = intersects_mask_value.shape local_slice = [(0, shp[0]), (0, shp[1])] else: _, local_slice = get_trimmed_array_by_mask(intersects_mask_value, return_adjustments=True) local_slice = [(l.start, l.stop) for l in local_slice] if local_slice is not None: offset_local_slice = [None] * len(local_slice) for idx in range(len(local_slice)): offset = target.dimensions[idx].bounds_local[0] offset_local_slice[idx] = (local_slice[idx][0] + offset, local_slice[idx][1] + offset) else: offset_local_slice = None gathered_offset_local_slices = vm.gather(offset_local_slice) if vm.rank == 0: gathered_offset_local_slices = [ g for g in gathered_offset_local_slices if g is not None ] if len(gathered_offset_local_slices) == 0: raise_empty_subset = True else: raise_empty_subset = False offset_array = np.array(gathered_offset_local_slices) global_slice = [None] * offset_array.shape[1] for idx in range(len(global_slice)): global_slice[idx] = (np.min(offset_array[:, idx, :]), np.max(offset_array[:, idx, :])) else: global_slice = None raise_empty_subset = None raise_empty_subset = vm.bcast(raise_empty_subset) if raise_empty_subset: raise EmptySubsetError global_slice = vm.bcast(global_slice) global_slice = tuple([slice(g[0], g[1]) for g in global_slice]) intersects_mask = Variable(name='mask_gather', value=intersects_mask_value, dimensions=target.dimensions, dtype=bool) if apply_slice: if vm.size_global > 1: ret = target.get_distributed_slice(global_slice) ret_mask = intersects_mask.get_distributed_slice(global_slice) else: ret = target.__getitem__(global_slice) ret_mask = intersects_mask.__getitem__(global_slice) else: ret = target ret_mask = intersects_mask return ret, ret_mask, global_slice
def test_system_converting_state_boundaries_shapefile(self): ocgis.env.USE_NETCDF4_MPI = False # tdk:FIX: this hangs in the STATE_FIPS write for asynch might be nc4 bug... keywords = {'transform_to_crs': [None, Spherical], 'use_geometry_iterator': [False, True]} actual_xsums = [] actual_ysums = [] for k in self.iter_product_keywords(keywords): if k.use_geometry_iterator and k.transform_to_crs is not None: to_crs = k.transform_to_crs() else: to_crs = None if k.transform_to_crs is None: desired_crs = WGS84() else: desired_crs = k.transform_to_crs() rd = RequestDataset(uri=self.path_state_boundaries) rd.metadata['schema']['geometry'] = 'MultiPolygon' field = rd.get() # Test there is no mask present. field.geom.load() self.assertFalse(field.geom.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, field) self.assertIsNone(field.dimension_map.get_spatial_mask()) self.assertEqual(field.crs, WGS84()) if k.transform_to_crs is not None: field.update_crs(desired_crs) try: gc = field.geom.convert_to(pack=False, use_geometry_iterator=k.use_geometry_iterator, to_crs=to_crs) except ValueError as e: try: self.assertFalse(k.use_geometry_iterator) self.assertIsNotNone(to_crs) except AssertionError: raise e else: continue actual_xsums.append(gc.x.get_value().sum()) actual_ysums.append(gc.y.get_value().sum()) self.assertEqual(gc.crs, desired_crs) # Test there is no mask present after conversion to geometry coordinates. self.assertFalse(gc.has_mask) self.assertNotIn(VariableName.SPATIAL_MASK, gc.parent) self.assertIsNone(gc.dimension_map.get_spatial_mask()) for v in list(field.values()): if v.name != field.geom.name: gc.parent.add_variable(v.extract(), force=True) path = self.get_temporary_file_path('esmf_state_boundaries.nc') self.assertEqual(gc.parent.crs, desired_crs) gc.parent.write(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) gathered_geoms = vm.gather(field.geom.get_value()) if vm.rank == 0: actual_geoms = [] for g in gathered_geoms: actual_geoms.extend(g) rd = RequestDataset(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT) infield = rd.get() self.assertEqual(create_crs(infield.crs.value), desired_crs) for dv in field.data_variables: self.assertIn(dv.name, infield) ingrid = infield.grid self.assertIsInstance(ingrid, GridUnstruct) for g in ingrid.archetype.iter_geometries(): self.assertPolygonSimilar(g[1], actual_geoms[g[0]], check_type=False) vm.barrier() # Test coordinates have actually changed. if not k.use_geometry_iterator: for ctr, to_test in enumerate([actual_xsums, actual_ysums]): for lhs, rhs in itertools.combinations(to_test, 2): if ctr == 0: self.assertAlmostEqual(lhs, rhs) else: self.assertNotAlmostEqual(lhs, rhs)