def reduce_global(self): """ De-duplicate and reindex (reset start index) an element node connectivity variable. Operation is collective across the current VM. The new node dimension is distributed. Return a shallow copy of `self` for convenience. :rtype: :class:`~ocgis.spatial.geomc.AbstractGeometryCoordinates` """ raise_if_empty(self) if self.cindex is None: raise ValueError('A coordinate index is required to reduce coordinates.') new_cindex, uidx = reduce_reindex_coordinate_index(self.cindex, start_index=self.start_index) new_cindex = Variable(name=self.cindex.name, value=new_cindex, dimensions=self.cindex.dimensions) ret = self.copy() new_parent = self.x[uidx].parent cdim = new_parent[self.x.name].dimensions[0] new_node_dimension = create_distributed_dimension(cdim.size, name=cdim.name, src_idx=cdim._src_idx) new_parent.dimensions[cdim.name] = new_node_dimension new_parent[self.cindex.name].extract(clean_break=True) ret._cindex_name = None ret.parent = new_parent ret.cindex = new_cindex return ret
def test_create_distributed_dimension_with_empty(self): if vm.size != 3: raise SkipTest('vm.size != 3') sizes = {0: 3, 1: 0, 2: 8} dim = create_distributed_dimension(sizes[vm.rank], name='hello') if vm.rank == 1: self.assertTrue(dim.is_empty) else: self.assertFalse(dim.is_empty)
def test_create_distributed_dimension(self): if vm.size != 2: raise SkipTest('vm.size != 2') if vm.rank == 0: size = 2 else: size = 4 dim = create_distributed_dimension(size, name='the_dist') self.assertTrue(dim.dist) self.assertEqual(dim.size_global, 6) self.assertEqual(dim.size, size) desired = {0: (0, 2), 1: (2, 6)} self.assertEqual(desired[vm.rank], dim.bounds_local)
def _get_field_write_target_(cls, field): """ Takes field data out of the OCGIS unstructured format (similar to UGRID) converting to the format expected by ESMF Unstructured metadata. """ # The driver for the current field must be NetCDF UGRID to ensure interpretability. assert field.dimension_map.get_driver() == DriverKey.NETCDF_UGRID grid = field.grid # Three-dimensional data is not supported. assert not grid.has_z # Number of coordinate dimension. This will be 3 for three-dimensional data. coord_dim = Dimension('coordDim', 2) # Transform ragged array to one-dimensional array. ############################################################# cindex = grid.cindex elements = cindex.get_value() num_element_conn_data = [e.shape[0] for e in elements.flat] length_connection_count = sum(num_element_conn_data) esmf_element_conn = np.zeros(length_connection_count, dtype=elements[0].dtype) start = 0 tag_start_index = MPITag.START_INDEX # Collapse the ragged element index array into a single dimensioned vector. This communication block finds the # size for the new array. ###################################################################################### if vm.size > 1: max_index = max([ii.max() for ii in elements.flat]) if vm.rank == 0: vm.comm.isend(max_index + 1, dest=1, tag=tag_start_index) adjust = 0 else: adjust = vm.comm.irecv(source=vm.rank - 1, tag=tag_start_index) adjust = adjust.wait() if vm.rank != vm.size - 1: vm.comm.isend(max_index + 1 + adjust, dest=vm.rank + 1, tag=tag_start_index) # Fill the new vector for the element connectivity. ############################################################ for ii in elements.flat: if vm.size > 1: if grid.archetype.has_multi: mbv = cindex.attrs[OcgisConvention.Name.MULTI_BREAK_VALUE] replace_breaks = np.where(ii == mbv)[0] else: replace_breaks = [] ii = ii + adjust if len(replace_breaks) > 0: ii[replace_breaks] = mbv esmf_element_conn[start:start + ii.shape[0]] = ii start += ii.shape[0] # Create the new data representation. ########################################################################## connection_count = create_distributed_dimension(esmf_element_conn.size, name='connectionCount') esmf_element_conn_var = Variable(name='elementConn', value=esmf_element_conn, dimensions=connection_count) esmf_element_conn_var.attrs[ CFName. LONG_NAME] = 'Node indices that define the element connectivity.' mbv = cindex.attrs.get(OcgisConvention.Name.MULTI_BREAK_VALUE) if mbv is not None: esmf_element_conn_var.attrs['polygon_break_value'] = mbv esmf_element_conn_var.attrs['start_index'] = grid.start_index ret = VariableCollection(variables=field.copy().values(), force=True) # Rename the element count dimension. original_name = ret[cindex.name].dimensions[0].name ret.rename_dimension(original_name, 'elementCount') # Add the element-node connectivity variable to the collection. ret.add_variable(esmf_element_conn_var) num_element_conn = Variable( name='numElementConn', value=num_element_conn_data, dimensions=cindex.dimensions[0], attrs={CFName.LONG_NAME: 'Number of nodes per element.'}) ret.add_variable(num_element_conn) node_coords = Variable(name='nodeCoords', dimensions=(grid.node_dim, coord_dim)) node_coords.units = 'degrees' node_coords.attrs[ CFName. LONG_NAME] = 'Node coordinate values indexed by element connectivity.' node_coords.attrs['coordinates'] = 'x y' fill = node_coords.get_value() fill[:, 0] = grid.x.get_value() fill[:, 1] = grid.y.get_value() ret.pop(grid.x.name) ret.pop(grid.y.name) ret.add_variable(node_coords) ret.attrs['gridType'] = 'unstructured' ret.attrs['version'] = '0.9' return ret
def reduce_reindex_coordinate_index(cindex, start_index=0): """ Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure. Function will not respect masks. The function returns a two-element tuple: * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing. * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external coordinate storage variable or array. :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may also be a NumPy array. :type cindex: :class:`~ocgis.Variable` | :class:`~numpy.ndarray` :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``. :rtype: tuple """ # Get the coordinate index values as a NumPy array. try: cindex = cindex.get_value() except AttributeError: # Assume this is already a NumPy array. pass # Only work with 1D arrays. cindex = np.atleast_1d(cindex) # Used to return the coordinate index to the original shape of the incoming coordinate index. original_shape = cindex.shape cindex = cindex.flatten() # Create the unique coordinate index array. # barrier_print('before create_unique_global_array') u = np.array(create_unique_global_array(cindex)) # barrier_print('after create_unique_global_array') # Synchronize the data type for the new coordinate index. lrank = vm.rank if lrank == 0: dtype = u.dtype else: dtype = None dtype = vm.bcast(dtype) # Flag to indicate if the current rank has any unique values. has_u = len(u) > 0 # Create the new coordinate index. new_u_dimension = create_distributed_dimension(len(u), name='__new_u_dimension__') new_u = arange_from_dimension(new_u_dimension, start=start_index, dtype=dtype) # Create a hash for the new index. This is used to remap the old coordinate index. if has_u: uidx = {ii: jj for ii, jj in zip(u, new_u)} else: uidx = None vm.barrier() # Construct local bounds for the rank's unique value. This is used as a cheap index when ranks are looking for # index overlaps. if has_u: local_bounds = min(u), max(u) else: local_bounds = None # Put a copy for the bounds indexing on each rank. lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == lrank: continue if lb is not None: contains = lb[0] <= cindex contains = np.logical_and(lb[1] >= cindex, contains) if np.any(contains): overlaps.append(rank) # Ranks must be able to identify which ranks will be asking them for data. global_overlaps = vm.gather(overlaps) global_overlaps = vm.bcast(global_overlaps) destinations = [ii for ii, jj in enumerate(global_overlaps) if vm.rank in jj] # MPI communication tags used in the algorithm. tag_search = MPITag.REDUCE_REINDEX_SEARCH tag_success = MPITag.REDUCE_REINDEX_SUCCESS tag_child_finished = MPITag.REDUCE_REINDEX_CHILD_FINISHED tag_found = MPITag.REDUCE_REINDEX_FOUND # Fill array for the new coordinate index. new_cindex = np.empty_like(cindex) # vm.barrier_print('starting run_rr') # Fill the new coordinate indexing. if lrank == 0: run_rr_root(new_cindex, cindex, uidx, destinations, tag_child_finished, tag_found, tag_search, tag_success) else: run_rr_nonroot(new_cindex, cindex, uidx, destinations, has_u, overlaps, tag_child_finished, tag_found, tag_search, tag_success) # vm.barrier_print('finished run_rr') # Return array to its original shape. new_cindex = new_cindex.reshape(*original_shape) vm.barrier() return new_cindex, u
def get_spatial_subset_operation(self, spatial_op, subset_geom, return_slice=False, original_mask=None, keep_touches=True, cascade=True, optimized_bbox_subset=False, apply_slice=True, geom_name=None): """ Perform intersects or intersection operations on the object. :param str spatial_op: Either an ``'intersects'`` or an ``'intersection'`` spatial operation. :param subset_geom: A scalar (single geometry) geometry variable or Shapely geometry to use in the spatial operation. All geometry types are accepted. :type subset_geom: :class:`~ocgis.GeometryVariable` | :class:`shapely.geometry.base.BaseGeometry` :param bool return_slice: If ``True``, also return the slices used to limit the grid's extent. :param original_mask: An optional mask to use as a hint for spatial operation. ``True`` values are excluded from spatial consideration. :type original_mask: :class:`numpy.ndarray` :param keep_touches: If ``True`` (the default), keep geometries that touch the subset geometry. :type keep_touches: :class:`bool` :param cascade: If ``True`` (the default), set the mask across all variables in the grid's parent collection. :param optimized_bbox_subset: If ``True``, perform an optimized bounding box subset on the grid. This will only use the grid's representative coordinates ignoring bounds, geometries, etc. :param bool apply_slice: If ``True`` (the default), apply the slice to the grid object in addition to updating its mask. :param str geom_name: If provided, use this name for the output geometry variable if this is an intersection operation. :return: If ``return_slice`` is ``False`` (the default), return a shallow copy of the sliced grid. If ``return_slice`` is ``True``, this will be a tuple with the subsetted object as the first element and the slice used as the second. If ``spatial_op`` is ``'intersection'``, the returned object is a geometry variable. :rtype: :class:`~ocgis.Grid` | :class:`~ocgis.GeometryVariable` | :class:`tuple` of ``(<returned object>, <slice used>)`` """ # TODO: Merge this with the grid's spatial operation. if optimized_bbox_subset and spatial_op == 'intersection': raise ValueError("'optimized_bbox_subset' must be False when performing an intersection") raise_if_empty(self) try: subset_geom = subset_geom.prepare() except AttributeError: if not isinstance(subset_geom, BaseGeometry): msg = 'Only Shapely geometries allowed for subsetting. Subset type is "{}".'.format( type(subset_geom)) raise ValueError(msg) else: subset_geom = subset_geom.get_value()[0] if self.get_mask() is None: original_has_mask = False else: original_has_mask = True if geom_name is None: geom_name = get_default_geometry_variable_name(self) if spatial_op == 'intersection': perform_intersection = True else: perform_intersection = False if original_mask is None and self.__use_bounds_intersects_optimizations__: if isinstance(subset_geom, BaseMultipartGeometry): geom_itr = subset_geom else: geom_itr = [subset_geom] x = self.x.get_value() y = self.y.get_value() if self.has_z: z = self.z.get_value() else: z = None for ctr, geom in enumerate(geom_itr): if geom.has_z: coords = np.array(geom.exterior.coords) z_coords = coords[:, 2] z_bounds = z_coords.min(), z_coords.max() else: z_bounds = None single_hint_mask = get_xyz_select(x, y, geom.bounds, z=z, z_bounds=z_bounds, keep_touches=keep_touches) if ctr == 0: hint_mask = single_hint_mask else: hint_mask = np.logical_or(hint_mask, single_hint_mask) hint_mask = np.invert(hint_mask) original_mask = hint_mask if not optimized_bbox_subset: mask = self.get_mask() if mask is not None: original_mask = np.logical_or(mask, hint_mask) elif not self.__use_bounds_intersects_optimizations__: original_mask = np.zeros(self.element_dim.size, dtype=bool) ret = self.copy() if original_has_mask: ret.set_mask(ret.get_mask().copy()) if optimized_bbox_subset: the_slice = np.invert(original_mask) sliced_obj = ret.get_distributed_slice(the_slice) else: fill_mask = original_mask geometry_fill = None # If everything is masked, there is no reason to load the grid geometries. if not original_mask.all(): if perform_intersection: geometry_fill = np.zeros(fill_mask.shape, dtype=object) gp = GeometryProcessor(self.get_geometry_iterable(hint_mask=original_mask), subset_geom, keep_touches=keep_touches) for idx, intersects_logical, current_geometry in gp.iter_intersects(): fill_mask[idx] = not intersects_logical if perform_intersection and intersects_logical: geometry_fill[idx] = current_geometry.intersection(subset_geom) if perform_intersection: if geometry_fill is None: geometry_variable = GeometryVariable(name=geom_name) else: geometry_variable = GeometryVariable(name=geom_name, value=geometry_fill, mask=fill_mask, dimensions=ret.element_dim) ret.parent.add_variable(geometry_variable, force=True) the_slice = np.invert(fill_mask) if apply_slice: sliced_obj = ret.get_distributed_slice(the_slice) else: sliced_obj = ret sliced_mask_value = sliced_obj.get_mask() # Only modify the outgoing mask if any values are masked. if sliced_mask_value is not None and sliced_mask_value.any(): sliced_obj.set_mask(sliced_mask_value, cascade=cascade) # The element dimension needs to be updated to account for fancy slicing which may leave some ranks empty. new_element_dimension_name = self.element_dim.name if sliced_obj.is_empty: new_element_dimension_size = 0 new_element_dimension_src_idx = None else: element_dim = sliced_obj.element_dim new_element_dimension_size = element_dim.size new_element_dimension_src_idx = element_dim._src_idx new_element_dimension = create_distributed_dimension(new_element_dimension_size, name=new_element_dimension_name, src_idx=new_element_dimension_src_idx) sliced_obj.parent.dimensions[new_element_dimension.name] = new_element_dimension if perform_intersection: obj_to_ret = sliced_obj.parent[geometry_variable.name] else: obj_to_ret = sliced_obj if return_slice: ret = (obj_to_ret, the_slice) else: ret = obj_to_ret return ret
def get_subset(bbox, subset_filename, buffer_width, rhs_tol=10.): rd = ocgis.RequestDataset(uri=IN_PATH) rd.metadata['dimensions']['nlandmesh_face']['dist'] = True vc = rd.get_raw_field() # ------------------------------------------------------------------------------------------------------------------ # Subset the face centers and accumulate the indices of face centers occurring inside the bounding box selection. start_index = vc[MESHVAR].attrs.get('start_index', 0) # Stores indices of faces contained in the bounding box. px = vc[FACE_CENTER_X].extract().get_value() py = vc[FACE_CENTER_Y].extract().get_value() # Handle bounding box wrapping. This requires creating two bounding boxes to capture the left and right side of the # sphere. buffered_bbox = box(*bbox).buffer(buffer_width).envelope.bounds if buffered_bbox[0] < 0: bbox_rhs = list(deepcopy(buffered_bbox)) bbox_rhs[0] = buffered_bbox[0] + 360. bbox_rhs[2] = 360. + rhs_tol bboxes = [buffered_bbox, bbox_rhs] else: bboxes = [buffered_bbox] initial = None for ctr, curr_bbox in enumerate(bboxes): select = create_boolean_select_array(curr_bbox, px, py, initial=initial) initial = select # ------------------------------------------------------------------------------------------------------------------ # Use the selection criteria to extract associated nodes and reindex the new coordinate arrays. from ocgis.vmachine.mpi import rank_print # Retrieve the live ranks following the subset. has_select = ocgis.vm.gather(select.any()) if ocgis.vm.rank == 0: live_ranks = np.array(ocgis.vm.ranks)[has_select] else: live_ranks = None live_ranks = ocgis.vm.bcast(live_ranks) with ocgis.vm.scoped('live ranks', live_ranks): if not ocgis.vm.is_null: has_subset = True rank_print('live ranks:', ocgis.vm.ranks) sub = vc[FACE_NODE].get_distributed_slice([select, slice(None)]).parent cindex = sub[FACE_NODE] cindex_original_shape = cindex.shape cindex_value = cindex.get_value().flatten() if start_index > 0: cindex_value -= start_index vc_coords = vc[XVAR][cindex_value].parent archetype_dim = vc_coords[XVAR].dimensions[0] arange_dimension = create_distributed_dimension(cindex_value.shape[0], name='arange_dim') new_cindex_value = arange_from_dimension(arange_dimension, start=start_index) cindex.set_value(new_cindex_value.reshape(*cindex_original_shape)) new_vc_coords_dimension = create_distributed_dimension(vc_coords[XVAR].shape[0], name=archetype_dim.name, src_idx=archetype_dim._src_idx) vc_coords.dimensions[archetype_dim.name] = new_vc_coords_dimension # ------------------------------------------------------------------------------------------------------------------ # Format the new variable collection and write out the new data file. # Remove old coordinate variables. for to_modify in [XVAR, YVAR]: sub[to_modify].extract(clean_break=True) for to_add in [XVAR, YVAR]: var_to_add = vc_coords[to_add].extract() sub.add_variable(var_to_add) rank_print('start sub.write') sub.write(subset_filename) rank_print('finished sub.write') if ocgis.vm.rank == 0: print 'subset x extent:', sub[FACE_CENTER_X].extent print 'subset y extent:', sub[FACE_CENTER_Y].extent ocgis.RequestDataset(subset_filename).inspect() else: has_subset = False return has_subset
def _convert_to_ugrid_(field): """ Takes field data out of the OCGIS unstructured format (similar to UGRID) converting to the format expected by ESMF Unstructured metadata. """ # The driver for the current field must be NetCDF UGRID to ensure interpretability. assert field.dimension_map.get_driver() == DriverKey.NETCDF_UGRID grid = field.grid # Three-dimensional data is not supported. assert not grid.has_z # Number of coordinate dimension. This will be 3 for three-dimensional data. coord_dim = Dimension('coordDim', 2) # Transform ragged array to one-dimensional array. ############################################################# cindex = grid.cindex elements = cindex.get_value() num_element_conn_data = [e.shape[0] for e in elements.flat] length_connection_count = sum(num_element_conn_data) esmf_element_conn = np.zeros(length_connection_count, dtype=elements[0].dtype) start = 0 tag_start_index = MPITag.START_INDEX # Collapse the ragged element index array into a single dimensioned vector. This communication block finds the # size for the new array. ###################################################################################### if vm.size > 1: max_index = max([ii.max() for ii in elements.flat]) if vm.rank == 0: vm.comm.isend(max_index + 1, dest=1, tag=tag_start_index) adjust = 0 else: adjust = vm.comm.irecv(source=vm.rank - 1, tag=tag_start_index) adjust = adjust.wait() if vm.rank != vm.size - 1: vm.comm.isend(max_index + 1 + adjust, dest=vm.rank + 1, tag=tag_start_index) # Fill the new vector for the element connectivity. ############################################################ for ii in elements.flat: if vm.size > 1: if grid.archetype.has_multi: mbv = cindex.attrs[OcgisConvention.Name.MULTI_BREAK_VALUE] replace_breaks = np.where(ii == mbv)[0] else: replace_breaks = [] ii = ii + adjust if len(replace_breaks) > 0: ii[replace_breaks] = mbv esmf_element_conn[start: start + ii.shape[0]] = ii start += ii.shape[0] # Create the new data representation. ########################################################################## connection_count = create_distributed_dimension(esmf_element_conn.size, name='connectionCount') esmf_element_conn_var = Variable(name='elementConn', value=esmf_element_conn, dimensions=connection_count, dtype=np.int32) esmf_element_conn_var.attrs[CFName.LONG_NAME] = 'Node indices that define the element connectivity.' mbv = cindex.attrs.get(OcgisConvention.Name.MULTI_BREAK_VALUE) if mbv is not None: esmf_element_conn_var.attrs['polygon_break_value'] = mbv esmf_element_conn_var.attrs['start_index'] = grid.start_index ret = VariableCollection(variables=field.copy().values(), force=True) # Rename the element count dimension. original_name = ret[cindex.name].dimensions[0].name ret.rename_dimension(original_name, 'elementCount') # Add the element-node connectivity variable to the collection. ret.add_variable(esmf_element_conn_var) num_element_conn = Variable(name='numElementConn', value=num_element_conn_data, dimensions=cindex.dimensions[0], attrs={CFName.LONG_NAME: 'Number of nodes per element.'}, dtype=np.int32) ret.add_variable(num_element_conn) # Check that the node count dimension is appropriately named. gn_name = grid.node_dim.name if gn_name != 'nodeCount': ret.dimensions[gn_name] = ret.dimensions[gn_name].copy() ret.rename_dimension(gn_name, 'nodeCount') node_coords = Variable(name='nodeCoords', dimensions=(ret.dimensions['nodeCount'], coord_dim)) node_coords.units = 'degrees' node_coords.attrs[CFName.LONG_NAME] = 'Node coordinate values indexed by element connectivity.' node_coords.attrs['coordinates'] = 'x y' fill = node_coords.get_value() fill[:, 0] = grid.x.get_value() fill[:, 1] = grid.y.get_value() ret.pop(grid.x.name) ret.pop(grid.y.name) ret.add_variable(node_coords) ret.attrs['gridType'] = 'unstructured' ret.attrs['version'] = '0.9' # Remove the coordinate index, this does not matter. if field.grid.cindex is not None: ret.remove_variable(field.grid.cindex.name) return ret