Example #1
0
    def _gc_iter_dst_grid_slices_(grid_chunker):
        # TODO: This method uses some global gathers which is not ideal.
        # Destination splitting works off center coordinates only.
        pgc = grid_chunker.dst_grid.abstractions_available['point']

        # Use the unique center values to break the grid into pieces. This ensures that nearby grid cell are close
        # spatially. If we just break the grid into pieces w/out using unique values, the points may be scattered which
        # does not optimize the spatial coverage of the source grid.
        center_lat = pgc.y.get_value()

        # ucenter_lat = np.unique(center_lat)
        ucenter_lat = create_unique_global_array(center_lat)

        ucenter_lat = vm.gather(ucenter_lat)
        if vm.rank == 0:
            ucenter_lat = hgather(ucenter_lat)
            ucenter_lat.sort()
            ucenter_splits = np.array_split(ucenter_lat, grid_chunker.nchunks_dst[0])
        else:
            ucenter_splits = [None] * grid_chunker.nchunks_dst[0]

        for ucenter_split in ucenter_splits:
            ucenter_split = vm.bcast(ucenter_split)
            select = np.zeros_like(center_lat, dtype=bool)
            for v in ucenter_split.flat:
                select = np.logical_or(select, center_lat == v)
            yield select
Example #2
0
def reduce_reindex_coordinate_index(cindex, start_index=0):
    """
    Reindex a subset of global coordinate indices contained in the ``cindex`` variable.

    The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure.

    Function will not respect masks.

    The function returns a two-element tuple:

     * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing.
     * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external
       coordinate storage variable or array.

    :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may
     also be a NumPy array.
    :type cindex: :class:`~ocgis.Variable` | :class:`~numpy.ndarray`
    :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``.
    :rtype: tuple
    """

    # Get the coordinate index values as a NumPy array.
    try:
        cindex = cindex.get_value()
    except AttributeError:
        # Assume this is already a NumPy array.
        pass

    # Only work with 1D arrays.
    cindex = np.atleast_1d(cindex)
    # Used to return the coordinate index to the original shape of the incoming coordinate index.
    original_shape = cindex.shape
    cindex = cindex.flatten()

    # Create the unique coordinate index array.
    # barrier_print('before create_unique_global_array')
    u = np.array(create_unique_global_array(cindex))
    # barrier_print('after create_unique_global_array')

    # Synchronize the data type for the new coordinate index.
    lrank = vm.rank
    if lrank == 0:
        dtype = u.dtype
    else:
        dtype = None
    dtype = vm.bcast(dtype)

    # Flag to indicate if the current rank has any unique values.
    has_u = len(u) > 0

    # Create the new coordinate index.
    new_u_dimension = create_distributed_dimension(len(u), name='__new_u_dimension__')
    new_u = arange_from_dimension(new_u_dimension, start=start_index, dtype=dtype)

    # Create a hash for the new index. This is used to remap the old coordinate index.
    if has_u:
        uidx = {ii: jj for ii, jj in zip(u, new_u)}
    else:
        uidx = None

    vm.barrier()

    # Construct local bounds for the rank's unique value. This is used as a cheap index when ranks are looking for
    # index overlaps.
    if has_u:
        local_bounds = min(u), max(u)
    else:
        local_bounds = None
    # Put a copy for the bounds indexing on each rank.
    lb_global = vm.gather(local_bounds)
    lb_global = vm.bcast(lb_global)

    # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds.
    overlaps = []
    for rank, lb in enumerate(lb_global):
        if rank == lrank:
            continue
        if lb is not None:
            contains = lb[0] <= cindex
            contains = np.logical_and(lb[1] >= cindex, contains)
            if np.any(contains):
                overlaps.append(rank)

    # Ranks must be able to identify which ranks will be asking them for data.
    global_overlaps = vm.gather(overlaps)
    global_overlaps = vm.bcast(global_overlaps)
    destinations = [ii for ii, jj in enumerate(global_overlaps) if vm.rank in jj]

    # MPI communication tags used in the algorithm.
    tag_search = MPITag.REDUCE_REINDEX_SEARCH
    tag_success = MPITag.REDUCE_REINDEX_SUCCESS
    tag_child_finished = MPITag.REDUCE_REINDEX_CHILD_FINISHED
    tag_found = MPITag.REDUCE_REINDEX_FOUND

    # Fill array for the new coordinate index.
    new_cindex = np.empty_like(cindex)

    # vm.barrier_print('starting run_rr')
    # Fill the new coordinate indexing.
    if lrank == 0:
        run_rr_root(new_cindex, cindex, uidx, destinations, tag_child_finished, tag_found, tag_search, tag_success)
    else:
        run_rr_nonroot(new_cindex, cindex, uidx, destinations, has_u, overlaps, tag_child_finished, tag_found,
                       tag_search,
                       tag_success)
    # vm.barrier_print('finished run_rr')

    # Return array to its original shape.
    new_cindex = new_cindex.reshape(*original_shape)

    vm.barrier()

    return new_cindex, u
Example #3
0
def reduce_reindex_coordinate_variables(cindex, start_index=0):
    """
    Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The coordinate values contained
    in ``coords`` will be reduced to match the coordinates required by the indices in ``cindex``.

    The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure.

    Function will not respect masks.

    The function returns a two-element tuple:

     * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing.
     * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external
       coordinate storage variable or array.

    :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may
     also be a NumPy array.
    :type cindex: :class:`~ocgis.Variable` || :class:`~numpy.ndarray`
    :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``.
    :rtype: tuple
    """

    # Get the coordinate index values as a NumPy array.
    try:
        cindex = cindex.get_value()
    except AttributeError:
        # Assume this is already a NumPy array.
        pass

    # Create the unique coordinte index array.
    u = np.array(create_unique_global_array(cindex))

    # Holds re-indexed values.
    new_cindex = np.empty_like(cindex)
    # Caches the local re-indexing for the process.
    cache = {}
    # Increment the indexing values based on its presence in the cache.
    curr_idx = 0
    for idx, to_reindex in enumerate(u.flat):
        if to_reindex not in cache:
            cache[to_reindex] = curr_idx
            curr_idx += 1

    # MPI communication tags.
    tag_cache_create = MPITag.REINDEX_CACHE_CREATE
    tag_cache_get_recv = MPITag.REINDEX_CACHE_GET_RECV
    tag_cache_get_send = MPITag.REINDEX_CACHE_GET_SEND

    # This is the local offset to move sequentially across processes. If the local cache is empty, there is no
    # offsetting to move between tasks.
    if len(cache) > 0:
        offset = max(cache.values()) + 1
    else:
        offset = 0

    # Synchronize the processes with the appropriate local offset.
    for idx, rank in enumerate(vm.ranks):
        try:
            dest_rank = vm.ranks[idx + 1]
        except IndexError:
            break
        else:
            if vm.rank == rank:
                vm.comm.send(start_index + offset,
                             dest=dest_rank,
                             tag=tag_cache_create)
            elif vm.rank == dest_rank:
                offset_previous = vm.comm.recv(source=rank,
                                               tag=tag_cache_create)
                start_index = offset_previous
    vm.barrier()

    # Find any missing local coordinate indices that are not mapped by the local cache.
    is_missing = False
    is_missing_indices = []
    for idx, to_reindex in enumerate(cindex.flat):
        try:
            local_new_cindex = cache[to_reindex]
        except KeyError:
            is_missing = True
            is_missing_indices.append(idx)
        else:
            new_cindex[idx] = local_new_cindex + start_index

    # Check if there are any processors missing their new index values.
    is_missing_global = vm.gather(is_missing)
    if vm.rank == 0:
        is_missing_global = any(is_missing_global)
    is_missing_global = vm.bcast(is_missing_global)

    # Execute a search across the process caches for any missing coordinate index values.
    if is_missing_global:
        for rank in vm.ranks:
            is_missing_rank = vm.bcast(is_missing, root=rank)
            if is_missing_rank:
                n_missing = vm.bcast(len(is_missing_indices), root=rank)
                if vm.rank == rank:
                    for imi in is_missing_indices:
                        for subrank in vm.ranks:
                            if vm.rank != subrank:
                                vm.comm.send(cindex[imi],
                                             dest=subrank,
                                             tag=tag_cache_get_recv)
                                new_cindex_element = vm.comm.recv(
                                    source=subrank, tag=tag_cache_get_send)
                                if new_cindex_element is not None:
                                    new_cindex[imi] = new_cindex_element
                else:
                    for _ in range(n_missing):
                        curr_missing = vm.comm.recv(source=rank,
                                                    tag=tag_cache_get_recv)
                        new_cindex_element = cache.get(curr_missing)
                        if new_cindex_element is not None:
                            new_cindex_element += start_index
                        vm.comm.send(new_cindex_element,
                                     dest=rank,
                                     tag=tag_cache_get_send)

    return new_cindex, u