def load_npy(comm, filename, dim_data): """ Load a LocalArray from a ``.npy`` file. Parameters ---------- filename : str The file to read. dim_data : tuple of dict A dict for each dimension, with the data described here: https://github.com/enthought/distributed-array-protocol, describing which portions of the HDF5 file to load into this LocalArray, and with what metadata. comm : MPI comm object Returns ------- result : LocalArray A LocalArray encapsulating the data loaded. """ #TODO: validate dim_data somehow index = compact_indices(dim_data) data = np.load(filename, mmap_mode='r') buf = data[index].copy() # Apparently there isn't a clean way to close a numpy memmap; it is closed # when the object is garbage-collected. This stackoverflow question claims # that one can close it with data._mmap.close(), but it seems risky # http://stackoverflow.com/questions/6397495/unmap-of-numpy-memmap #data._mmap.close() distribution = maps.Distribution(comm=comm, dim_data=dim_data) return LocalArray(distribution=distribution, dtype=data.dtype, buf=buf)
def from_distarray(cls, comm, obj): """Make a LocalArray from Distributed Array Protocol data structure. An object that supports the Distributed Array Protocol will have a `__distarray__` method that returns the data structure described here: https://github.com/enthought/distributed-array-protocol Parameters ---------- obj : an object with a `__distarray__` method or a dict If a dict, it must conform to the structure defined by the distributed array protocol. Returns ------- LocalArray A LocalArray encapsulating the buffer of the original data. No copy is made. """ if isinstance(obj, Mapping): distbuffer = obj else: distbuffer = obj.__distarray__() buf = np.asarray(distbuffer['buffer']) dim_data = distbuffer['dim_data'] distribution = maps.Distribution(comm=comm, dim_data=dim_data) return cls(distribution=distribution, buf=buf)
def load_hdf5(comm, filename, dim_data, key='buffer'): """ Load a LocalArray from an ``.hdf5`` file. Parameters ---------- filename : str The filename to read. dim_data : tuple of dict A dict for each dimension, with the data described here: https://github.com/enthought/distributed-array-protocol, describing which portions of the HDF5 file to load into this LocalArray, and with what metadata. comm : MPI comm object key : str, optional The identifier for the group to load the LocalArray from (the default is 'buffer'). Returns ------- result : LocalArray A LocalArray encapsulating the data loaded. Note ---- For `dim_data` dimension dictionaries containing unstructured ('u') distribution types, the indices selected by the `'indices'` key must be in increasing order. This is a limitation of h5py / hdf5. """ try: import h5py except ImportError: errmsg = "An MPI-enabled h5py must be available to use save_hdf5." raise ImportError(errmsg) #TODO: validate dim_data somehow index = compact_indices(dim_data) with h5py.File(filename, mode='r', driver='mpio', comm=comm) as fp: dset = fp[key] buf = dset[index] dtype = dset.dtype distribution = maps.Distribution(comm=comm, dim_data=dim_data) return LocalArray(distribution=distribution, dtype=dtype, buf=buf)
def local_reduction(out_comm, reducer, larr, ddpr, dtype, axes): """ Entry point for reductions on local arrays. Parameters ---------- reducer : callable Performs the core reduction operation. out_comm: MPI Comm instance. The MPI communicator for the result of the reduction. Is equal to MPI.COMM_NULL when this rank is not part of the output communicator. larr: LocalArray Input. Defined for all ranks. ddpr: sequence of dim-data dictionaries. axes: Sequence of ints or None. Returns ------- LocalArray or None When out_comm == MPI.COMM_NULL, returns None. Otherwise, returns the LocalArray section of the reduction result. """ if out_comm == MPI.COMM_NULL: out = None else: dim_data = ddpr[out_comm.Get_rank()] if ddpr else () dist = maps.Distribution(comm=out_comm, dim_data=dim_data) out = empty(dist, dtype) remaining_dims = [False] * larr.ndim for axis in axes: remaining_dims[axis] = True reduce_comm = larr.comm.Sub(remaining_dims) return reducer(reduce_comm, larr, out, axes, dtype)