def _gc_iter_dst_grid_slices_(grid_chunker): # TODO: This method uses some global gathers which is not ideal. # Destination splitting works off center coordinates only. pgc = grid_chunker.dst_grid.abstractions_available['point'] # Use the unique center values to break the grid into pieces. This ensures that nearby grid cell are close # spatially. If we just break the grid into pieces w/out using unique values, the points may be scattered which # does not optimize the spatial coverage of the source grid. center_lat = pgc.y.get_value() # ucenter_lat = np.unique(center_lat) ucenter_lat = create_unique_global_array(center_lat) ucenter_lat = vm.gather(ucenter_lat) if vm.rank == 0: ucenter_lat = hgather(ucenter_lat) ucenter_lat.sort() ucenter_splits = np.array_split(ucenter_lat, grid_chunker.nchunks_dst[0]) else: ucenter_splits = [None] * grid_chunker.nchunks_dst[0] for ucenter_split in ucenter_splits: ucenter_split = vm.bcast(ucenter_split) select = np.zeros_like(center_lat, dtype=bool) for v in ucenter_split.flat: select = np.logical_or(select, center_lat == v) yield select
def reduce_reindex_coordinate_index(cindex, start_index=0): """ Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure. Function will not respect masks. The function returns a two-element tuple: * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing. * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external coordinate storage variable or array. :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may also be a NumPy array. :type cindex: :class:`~ocgis.Variable` | :class:`~numpy.ndarray` :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``. :rtype: tuple """ # Get the coordinate index values as a NumPy array. try: cindex = cindex.get_value() except AttributeError: # Assume this is already a NumPy array. pass # Only work with 1D arrays. cindex = np.atleast_1d(cindex) # Used to return the coordinate index to the original shape of the incoming coordinate index. original_shape = cindex.shape cindex = cindex.flatten() # Create the unique coordinate index array. # barrier_print('before create_unique_global_array') u = np.array(create_unique_global_array(cindex)) # barrier_print('after create_unique_global_array') # Synchronize the data type for the new coordinate index. lrank = vm.rank if lrank == 0: dtype = u.dtype else: dtype = None dtype = vm.bcast(dtype) # Flag to indicate if the current rank has any unique values. has_u = len(u) > 0 # Create the new coordinate index. new_u_dimension = create_distributed_dimension(len(u), name='__new_u_dimension__') new_u = arange_from_dimension(new_u_dimension, start=start_index, dtype=dtype) # Create a hash for the new index. This is used to remap the old coordinate index. if has_u: uidx = {ii: jj for ii, jj in zip(u, new_u)} else: uidx = None vm.barrier() # Construct local bounds for the rank's unique value. This is used as a cheap index when ranks are looking for # index overlaps. if has_u: local_bounds = min(u), max(u) else: local_bounds = None # Put a copy for the bounds indexing on each rank. lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == lrank: continue if lb is not None: contains = lb[0] <= cindex contains = np.logical_and(lb[1] >= cindex, contains) if np.any(contains): overlaps.append(rank) # Ranks must be able to identify which ranks will be asking them for data. global_overlaps = vm.gather(overlaps) global_overlaps = vm.bcast(global_overlaps) destinations = [ii for ii, jj in enumerate(global_overlaps) if vm.rank in jj] # MPI communication tags used in the algorithm. tag_search = MPITag.REDUCE_REINDEX_SEARCH tag_success = MPITag.REDUCE_REINDEX_SUCCESS tag_child_finished = MPITag.REDUCE_REINDEX_CHILD_FINISHED tag_found = MPITag.REDUCE_REINDEX_FOUND # Fill array for the new coordinate index. new_cindex = np.empty_like(cindex) # vm.barrier_print('starting run_rr') # Fill the new coordinate indexing. if lrank == 0: run_rr_root(new_cindex, cindex, uidx, destinations, tag_child_finished, tag_found, tag_search, tag_success) else: run_rr_nonroot(new_cindex, cindex, uidx, destinations, has_u, overlaps, tag_child_finished, tag_found, tag_search, tag_success) # vm.barrier_print('finished run_rr') # Return array to its original shape. new_cindex = new_cindex.reshape(*original_shape) vm.barrier() return new_cindex, u
def reduce_reindex_coordinate_variables(cindex, start_index=0): """ Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The coordinate values contained in ``coords`` will be reduced to match the coordinates required by the indices in ``cindex``. The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure. Function will not respect masks. The function returns a two-element tuple: * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing. * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external coordinate storage variable or array. :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may also be a NumPy array. :type cindex: :class:`~ocgis.Variable` || :class:`~numpy.ndarray` :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``. :rtype: tuple """ # Get the coordinate index values as a NumPy array. try: cindex = cindex.get_value() except AttributeError: # Assume this is already a NumPy array. pass # Create the unique coordinte index array. u = np.array(create_unique_global_array(cindex)) # Holds re-indexed values. new_cindex = np.empty_like(cindex) # Caches the local re-indexing for the process. cache = {} # Increment the indexing values based on its presence in the cache. curr_idx = 0 for idx, to_reindex in enumerate(u.flat): if to_reindex not in cache: cache[to_reindex] = curr_idx curr_idx += 1 # MPI communication tags. tag_cache_create = MPITag.REINDEX_CACHE_CREATE tag_cache_get_recv = MPITag.REINDEX_CACHE_GET_RECV tag_cache_get_send = MPITag.REINDEX_CACHE_GET_SEND # This is the local offset to move sequentially across processes. If the local cache is empty, there is no # offsetting to move between tasks. if len(cache) > 0: offset = max(cache.values()) + 1 else: offset = 0 # Synchronize the processes with the appropriate local offset. for idx, rank in enumerate(vm.ranks): try: dest_rank = vm.ranks[idx + 1] except IndexError: break else: if vm.rank == rank: vm.comm.send(start_index + offset, dest=dest_rank, tag=tag_cache_create) elif vm.rank == dest_rank: offset_previous = vm.comm.recv(source=rank, tag=tag_cache_create) start_index = offset_previous vm.barrier() # Find any missing local coordinate indices that are not mapped by the local cache. is_missing = False is_missing_indices = [] for idx, to_reindex in enumerate(cindex.flat): try: local_new_cindex = cache[to_reindex] except KeyError: is_missing = True is_missing_indices.append(idx) else: new_cindex[idx] = local_new_cindex + start_index # Check if there are any processors missing their new index values. is_missing_global = vm.gather(is_missing) if vm.rank == 0: is_missing_global = any(is_missing_global) is_missing_global = vm.bcast(is_missing_global) # Execute a search across the process caches for any missing coordinate index values. if is_missing_global: for rank in vm.ranks: is_missing_rank = vm.bcast(is_missing, root=rank) if is_missing_rank: n_missing = vm.bcast(len(is_missing_indices), root=rank) if vm.rank == rank: for imi in is_missing_indices: for subrank in vm.ranks: if vm.rank != subrank: vm.comm.send(cindex[imi], dest=subrank, tag=tag_cache_get_recv) new_cindex_element = vm.comm.recv( source=subrank, tag=tag_cache_get_send) if new_cindex_element is not None: new_cindex[imi] = new_cindex_element else: for _ in range(n_missing): curr_missing = vm.comm.recv(source=rank, tag=tag_cache_get_recv) new_cindex_element = cache.get(curr_missing) if new_cindex_element is not None: new_cindex_element += start_index vm.comm.send(new_cindex_element, dest=rank, tag=tag_cache_get_send) return new_cindex, u