def test_read_array_header_2_0(): s = BytesIO() arr = np.ones((3, 6), dtype=float) format.write_array(s, arr, version=(2, 0)) s.seek(format.MAGIC_LEN) shape, fortran, dtype = format.read_array_header_2_0(s) assert_((shape, fortran, dtype) == ((3, 6), False, float))
def _init_from_file_header(self): """Initialize the object from existing file""" self.fs.seek(self.HEADER_DATA_SIZE_OFFSET) self.shape, fortran_order, self.dtype = npformat.read_array_header_2_0( self.fs) self.header_length = self.fs.tell() if fortran_order: raise ValueError( 'Column major (Fortran-style) files are not supported. Please' 'translate if first to row major (C-style).') # Determine itemsize shape = (0, ) + self.shape[1:] self.itemsize = np.empty(shape=shape, dtype=self.dtype).itemsize
def __init__(self, filename): self._filename = filename self._data = {} npz = np.load(filename) file = npz.zip.fp for key in npz.files: filename = '{}.npy'.format(key) npz.zip.open(filename) version = nlf.read_magic(file) shape, fortran_order, dtype = nlf.read_array_header_1_0(file) if version == (1, 0) \ else nlf.read_array_header_2_0(file) self._data[key] = np.memmap(file, dtype=dtype, mode='r', shape=shape, order='F' if fortran_order else 'C', offset=file.tell())
def _init_from_file_header(self): """Initialize the object from an existing file.""" self.fs.seek(self.HEADER_DATA_SIZE_OFFSET) try: self.shape, fortran_order, self.dtype = \ npformat.read_array_header_2_0(self.fs) except ValueError: raise ValueError( 'Npy file {} header is not 2.0 format. You can make the ' 'conversion using elfi.store.NpyFile by passing the ' 'preloaded array as an argument.'.format(self.filename)) self.header_length = self.fs.tell() if fortran_order: raise ValueError( 'Column major (Fortran-style) files are not supported. Please' 'translate if first to row major (C-style).') # Determine itemsize shape = (0, ) + self.shape[1:] self.itemsize = np.empty(shape=shape, dtype=self.dtype).itemsize
def load(file_name, axis, mpi_comm=MPI.COMM_WORLD): """ Load a numpy array across parallel jobs in the MPI communicator. The array is sliced along the chosen dimension, with minimal bandwidth. Parameters ---------- file_name : str The numpy array file to load. axis : int The axis on which to distribute the array. mpi_comm : mpi4py.MPI.Comm, optional The MPI communicator used to distribute, by default MPI.COMM_WORLD. Returns ------- (numpy.ndarray, tuple(int)) The distributed array, and the size of the full array. Raises ------ ValueError If the numpy version used to save the file is not supported. NotImplementedError If the array is saved in Fortran order. """ header = None if is_root_process(mpi_comm): with open(file_name, 'rb') as fp: version, _ = npformat.read_magic(fp) if version == 1: header = npformat.read_array_header_1_0(fp) elif version == 2: header = npformat.read_array_header_2_0(fp) else: raise ValueError( "Invalid numpy format version: {}".format(version)) header = *header, fp.tell() header = mpi_comm.bcast(header, root=0) full_shape, fortran, dtype, header_offset = header if fortran: raise NotImplementedError( "Fortran-ordered (column-major) arrays are not supported") ndims = len(full_shape) axis = utils.positive_index(axis, ndims) i_start, bin_size = distribute_mpi(full_shape[axis], mpi_comm) l_shape = list(full_shape) l_shape[axis] = bin_size l_array = np.empty(l_shape, dtype=dtype) slice_type = create_slice_view(axis, bin_size, shape=full_shape, dtype=dtype) slice_type.Commit() single_slice_extent = slice_type.extent if bin_size != 0: single_slice_extent /= bin_size displacement = header_offset + i_start * single_slice_extent base_type = to_mpi_datatype(l_array.dtype) fh = MPI.File.Open(mpi_comm, file_name, MPI.MODE_RDONLY) fh.Set_view(displacement, filetype=slice_type) fh.Read_all([l_array, l_array.size, base_type]) fh.Close() slice_type.Free() return l_array, full_shape