예제 #1
0
def load_npy(comm, filename, dim_data):
    """
    Load a LocalArray from a ``.npy`` file.

    Parameters
    ----------
    filename : str
        The file to read.
    dim_data : tuple of dict
        A dict for each dimension, with the data described here:
        https://github.com/enthought/distributed-array-protocol, describing
        which portions of the HDF5 file to load into this LocalArray, and with
        what metadata.
    comm : MPI comm object

    Returns
    -------
    result : LocalArray
        A LocalArray encapsulating the data loaded.

    """
    #TODO: validate dim_data somehow
    index = compact_indices(dim_data)
    data = np.load(filename, mmap_mode='r')
    buf = data[index].copy()

    # Apparently there isn't a clean way to close a numpy memmap; it is closed
    # when the object is garbage-collected.  This stackoverflow question claims
    # that one can close it with data._mmap.close(), but it seems risky
    # http://stackoverflow.com/questions/6397495/unmap-of-numpy-memmap

    #data._mmap.close()
    distribution = maps.Distribution(comm=comm, dim_data=dim_data)
    return LocalArray(distribution=distribution, dtype=data.dtype, buf=buf)
예제 #2
0
    def from_distarray(cls, comm, obj):
        """Make a LocalArray from Distributed Array Protocol data structure.

        An object that supports the Distributed Array Protocol will have
        a `__distarray__` method that returns the data structure
        described here:

        https://github.com/enthought/distributed-array-protocol

        Parameters
        ----------
        obj : an object with a `__distarray__` method or a dict
            If a dict, it must conform to the structure defined by the
            distributed array protocol.

        Returns
        -------
        LocalArray
            A LocalArray encapsulating the buffer of the original data.
            No copy is made.
        """
        if isinstance(obj, Mapping):
            distbuffer = obj
        else:
            distbuffer = obj.__distarray__()
        buf = np.asarray(distbuffer['buffer'])
        dim_data = distbuffer['dim_data']

        distribution = maps.Distribution(comm=comm, dim_data=dim_data)
        return cls(distribution=distribution, buf=buf)
예제 #3
0
def load_hdf5(comm, filename, dim_data, key='buffer'):
    """
    Load a LocalArray from an ``.hdf5`` file.

    Parameters
    ----------
    filename : str
        The filename to read.
    dim_data : tuple of dict
        A dict for each dimension, with the data described here:
        https://github.com/enthought/distributed-array-protocol, describing
        which portions of the HDF5 file to load into this LocalArray, and with
        what metadata.
    comm : MPI comm object
    key : str, optional
        The identifier for the group to load the LocalArray from (the default
        is 'buffer').

    Returns
    -------
    result : LocalArray
        A LocalArray encapsulating the data loaded.

    Note
    ----
    For `dim_data` dimension dictionaries containing unstructured ('u')
    distribution types, the indices selected by the `'indices'` key must be in
    increasing order.  This is a limitation of h5py / hdf5.

    """
    try:
        import h5py
    except ImportError:
        errmsg = "An MPI-enabled h5py must be available to use save_hdf5."
        raise ImportError(errmsg)

    #TODO: validate dim_data somehow
    index = compact_indices(dim_data)

    with h5py.File(filename, mode='r', driver='mpio', comm=comm) as fp:
        dset = fp[key]
        buf = dset[index]
        dtype = dset.dtype

    distribution = maps.Distribution(comm=comm, dim_data=dim_data)
    return LocalArray(distribution=distribution, dtype=dtype, buf=buf)
예제 #4
0
def local_reduction(out_comm, reducer, larr, ddpr, dtype, axes):
    """ Entry point for reductions on local arrays.

    Parameters
    ----------
    reducer : callable
        Performs the core reduction operation.

    out_comm: MPI Comm instance.
        The MPI communicator for the result of the reduction.  Is equal to
        MPI.COMM_NULL when this rank is not part of the output communicator.

    larr: LocalArray
        Input.  Defined for all ranks.

    ddpr: sequence of dim-data dictionaries.

    axes: Sequence of ints or None.

    Returns
    -------
    LocalArray or None
        When out_comm == MPI.COMM_NULL, returns None.
        Otherwise, returns the LocalArray section of the reduction result.
    """

    if out_comm == MPI.COMM_NULL:
        out = None
    else:
        dim_data = ddpr[out_comm.Get_rank()] if ddpr else ()
        dist = maps.Distribution(comm=out_comm, dim_data=dim_data)
        out = empty(dist, dtype)

    remaining_dims = [False] * larr.ndim
    for axis in axes:
        remaining_dims[axis] = True
    reduce_comm = larr.comm.Sub(remaining_dims)
    return reducer(reduce_comm, larr, out, axes, dtype)