def save_mpiio(comm, fn, g_kl): """ Write a global two-dimensional array to a single file in the npy format using MPI I/O: https://docs.scipy.org/doc/numpy/neps/npy-format.html Arrays written with this function can be read with numpy.load. Parameters ---------- comm MPI communicator. fn : str File name. g_kl : array Portion of the array on this MPI processes. """ from numpy.lib.format import dtype_to_descr, magic magic_str = magic(1, 0) local_nx, local_ny = g_kl.shape nx = np.empty_like(local_nx) ny = np.empty_like(local_ny) commx = comm.Sub((True, False)) commy = comm.Sub((False, True)) commx.Allreduce(np.asarray(local_nx), nx) commy.Allreduce(np.asarray(local_ny), ny) arr_dict_str = str({ 'descr': dtype_to_descr(g_kl.dtype), 'fortran_order': False, 'shape': (np.asscalar(nx), np.asscalar(ny)) }) while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15: arr_dict_str += ' ' arr_dict_str += '\n' header_len = len(arr_dict_str) + len(magic_str) + 2 offsetx = np.zeros_like(local_nx) commx.Exscan(np.asarray(ny * local_nx), offsetx) offsety = np.zeros_like(local_ny) commy.Exscan(np.asarray(local_ny), offsety) file = MPI.File.Open(comm, fn, MPI.MODE_CREATE | MPI.MODE_WRONLY) if MPI.COMM_WORLD.Get_rank() == 0: file.Write(magic_str) file.Write(np.int16(len(arr_dict_str))) file.Write(arr_dict_str.encode('latin-1')) mpitype = MPI._typedict[g_kl.dtype.char] filetype = mpitype.Create_vector(g_kl.shape[0], g_kl.shape[1], ny) filetype.Commit() file.Set_view(header_len + (offsety + offsetx) * mpitype.Get_size(), filetype=filetype) file.Write_all(g_kl.copy()) filetype.Free() file.Close()
def save_mpiio(self, file_name: str, vec: np.ndarray) -> None: """ Write a global two-dimensional array to a single file in the npy format using MPI I/O. Arrays written with this function can be read with numpy.load. Args: file_name (str): File name. vec (np.ndarray): Portion of the array on this MPI processes. This needs to be a two-dimensional array. """ magic_str = magic(1, 0) x_size, y_size = vec.shape vx, vy = np.empty_like(x_size), np.empty_like(y_size) commx = self.rank_grid.Sub((True, False)) commy = self.rank_grid.Sub((False, True)) commx.Allreduce(np.asarray(x_size), vx) commy.Allreduce(np.asarray(y_size), vy) arr_dict_str = str({ 'descr': dtype_to_descr(vec.dtype), 'fortran_order': False, 'shape': (np.asscalar(vx), np.asscalar(vy)) }) while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15: arr_dict_str += ' ' arr_dict_str += '\n' header_len = len(arr_dict_str) + len(magic_str) + 2 x_offset = np.zeros_like(x_size) commx.Exscan(np.asarray(vy * x_size), x_offset) y_offset = np.zeros_like(y_size) commy.Exscan(np.asarray(y_size), y_offset) file = MPI.File.Open(self.rank_grid, file_name, MPI.MODE_CREATE | MPI.MODE_WRONLY) if self.rank == 0: file.Write(magic_str) file.Write(np.int16(len(arr_dict_str))) file.Write(arr_dict_str.encode('latin-1')) mpitype = MPI._typedict[vec.dtype.char] filetype = mpitype.Create_vector(x_size, y_size, vy) filetype.Commit() file.Set_view(header_len + (y_offset + x_offset) * mpitype.Get_size(), filetype=filetype) file.Write_all(vec.copy()) filetype.Free() file.Close()
def get_header(files, axis): """get header for concatenated file Parameters ---------- files : path-like npy files to concatenate axis : int, default=0 axis to cocatenate along Returns ------- dict datatype descr, fortran order and shape, for concatenated file """ dtypes, shapes, forders = [], [], [] for file in files: f = np.load(file, "r") dtypes.append(f.dtype) shapes.append(f.shape) forders.append(f.flags["F_CONTIGUOUS"]) if all(dtype == dtypes[0] for dtype in dtypes): dtype = dtypes[0] else: raise ValueError("All files must have the same dtype") if all(forder == forders[0] for forder in forders): forder = forders[0] else: raise ValueError("All files must have the same fortran order") if all(len(shape) == len(shapes[0]) for shape in shapes): ndims = len(shapes[0]) else: raise ValueError("All files must have the same number of dimensions") if all( all(shape[axis_] == shapes[0][axis_] for shape in shapes) for axis_ in range(ndims) if axis_ != axis): shape = list(shapes[0]) shape[axis] = sum(shape_[axis] for shape_ in shapes) shape = tuple(shape) else: raise ValueError( "All files must have the same shape along the concatenation axis") header = { "descr": fmt.dtype_to_descr(dtype), "fortran_order": forder, "shape": shape, } return header
def _npy_header(shape, dtype, order='C'): # pragma: no cover d = {'shape': shape} if order == 'C': d['fortran_order'] = False elif order == 'F': d['fortran_order'] = True else: # Totally non-contiguous data. We will have to make it C-contiguous # before writing. Note that we need to test for C_CONTIGUOUS first # because a 1-D array is both C_CONTIGUOUS and F_CONTIGUOUS. d['fortran_order'] = False d['descr'] = dtype_to_descr(dtype) return d
def pack_header_data(shape, fortran_order, dtype): # Do very strict type checking, which is normally a not done in python. # We need repr() to work perfectly. msg = "`shape` must me a tuple of intergers." if type(shape) != type(()): raise TypeError(msg) for s in shape: if type(s) != type(1): raise TypeError(msg) if type(fortran_order) != type(True): msg = "`fortran_order` must be boolian." raise TypeError(msg) header_data = {} header_data['shape'] = shape header_data['fortran_order'] = fortran_order header_data['descr'] = npfor.dtype_to_descr(np.dtype(dtype)) return header_data
def _prepare_header_data(self): # Make header data d = { 'shape': self.shape, 'fortran_order': self.fortran_order, 'descr': npformat.dtype_to_descr(self.dtype) } h_bytes = io.BytesIO() npformat.write_array_header_2_0(h_bytes, d) # Pad the end of the header fill_len = self.header_length - h_bytes.tell() if fill_len < 0: raise OverflowError( "File {} cannot be appended. The header is too short.".format( self.filename)) elif fill_len > 0: h_bytes.write(b'\x20' * fill_len) h_bytes.seek(0) self._header_bytes_to_write = h_bytes.read()
def save(file_name, array, axis, full_shape=None, mpi_comm=MPI.COMM_WORLD): """ Save a numpy array from parallel jobs in the MPI communicator. The array is gathered along the chosen dimension. Parameters ---------- file_name : str The numpy array file to load. array : numpy.ndarray The distributed array. axis : int The axis on which to distribute the array. full_shape : tuple(int), optional The size of the full array, by default None. mpi_comm : mpi4py.MPI.Comm, optional The MPI communicator used to distribute, by default MPI.COMM_WORLD. """ if full_shape is None: full_shape = gather_full_shape(array, axis, mpi_comm) axis = utils.positive_index(axis, len(full_shape)) header_offset = None if is_root_process(mpi_comm): header_dict = { 'shape': full_shape, 'fortran_order': False, 'descr': npformat.dtype_to_descr(array.dtype) } with open(file_name, 'wb') as fp: try: npformat.write_array_header_1_0(fp, header_dict) except ValueError: npformat.write_array_header_2_0(fp, header_dict) header_offset = fp.tell() header_offset = mpi_comm.bcast(header_offset, root=0) i_start, bin_size = distribute_mpi(full_shape[axis], mpi_comm) slice_type = create_slice_view(axis, bin_size, shape=full_shape, dtype=array.dtype) slice_type.Commit() single_slice_extent = slice_type.extent if bin_size != 0: single_slice_extent /= bin_size displacement = header_offset + i_start * single_slice_extent base_type = to_mpi_datatype(array.dtype) fh = MPI.File.Open(mpi_comm, file_name, MPI.MODE_WRONLY | MPI.MODE_APPEND) fh.Set_view(displacement, filetype=slice_type) fh.Write_all([array, array.size, base_type]) fh.Close() slice_type.Free()
def open_memmap(filename, mode='r+', dtype=None, shape=None, fortran_order=False, version=(1,0), offset=0): """ Open a .npy file as a memory-mapped array, with offset argument. This may be used to read an existing file or create a new one. :param str filename: The name of the file on disk. This may not be a file-like object. :param str mode: The mode to open the file with. In addition to the standard file modes, 'c' is also accepted to mean "copy on write". See `numpy.memmap` for the available mode strings. :param dtype dtype: The data type of the array if we are creating a new file in "write" mode. :param tuple shape: The shape of the array if we are creating a new file in "write" mode. Shape of (contiguous) slice if opening an existing file. :param bool fortran_order: Whether the array should be Fortran-contiguous (True) or C-contiguous (False) if we are creating a new file in "write" mode. :param tuple version: If the mode is a "write" mode, then this is the version (major, minor) of the file format used to create the file. :param int offset: Number of elements to skip along the first dimension. :return numpy.memmap: The memory-mapped array. Raises: * :exc:`ValueError` if the data or the mode is invalid * :exc:`IOError` if the file is not found or cannot be opened correctly. .. seealso:: :func:`numpy.memmap` """ if not isinstance(filename, basestring): raise ValueError("Filename must be a string. Memmap cannot use" \ " existing file handles.") if 'w' in mode: assert offset == 0, "Cannot specify offset when creating memmap" # We are creating the file, not reading it. # Check if we ought to create the file. if version != (1, 0): msg = "only support version (1,0) of file format, not %r" raise ValueError(msg % (version,)) # Ensure that the given dtype is an authentic dtype object rather than # just something that can be interpreted as a dtype object. dtype = np.dtype(dtype) if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) d = dict( descr=dtype_to_descr(dtype), fortran_order=fortran_order, shape=shape, ) # If we got here, then it should be safe to create the file. fp = open(filename, mode+'b') try: fp.write(magic(*version)) write_array_header_1_0(fp, d) offset = fp.tell() finally: fp.close() else: # Read the header of the file first. fp = open(filename, 'rb') try: version = read_magic(fp) if version != (1, 0): msg = "only support version (1,0) of file format, not %r" raise ValueError(msg % (version,)) fullshape, fortran_order, dtype = read_array_header_1_0(fp) if shape: length = np.atleast_1d(shape) msg = "Specify shape along first dimension only" assert length.ndim == 1, msg else: length = fullshape[0] - offset shape = (length,) + fullshape[1:] if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) offset_items = offset * np.prod(fullshape[1:], dtype=int) offset_bytes = fp.tell() + offset_items * dtype.itemsize finally: fp.close() if fortran_order: order = 'F' else: order = 'C' # We need to change a write-only mode to a read-write mode since we've # already written data to the file. if mode == 'w+': mode = 'r+' marray = np.memmap(filename, dtype=dtype, shape=shape, order=order, mode=mode, offset=offset_bytes) return marray
def save_npy(fn, data, subdomain_locations=None, nb_grid_pts=None, comm=MPI.COMM_WORLD): """ Parameters ---------- data : numpy array : data owned by the processor location : index of the first element of data within the global data nb_grid_pts : nb_grid_pts of the global data comm : MPI communicator Returns ------- """ data = np.asarray(data) ndims = len(data.shape) if ndims == 1: data = data.reshape((-1, 1)) if subdomain_locations is None: subdomain_locations = (0, 0) elif ndims == 1: subdomain_locations = (subdomain_locations, 0) nb_subdomain_grid_pts = data.shape if nb_grid_pts is None: nb_grid_pts = nb_subdomain_grid_pts elif ndims == 1: nb_grid_pts = (nb_grid_pts, 1) fortran_order = np.isfortran(data) from numpy.lib.format import dtype_to_descr, magic magic_str = magic(1, 0) arr_dict_str = str({ 'descr': dtype_to_descr(data.dtype), 'fortran_order': fortran_order, 'shape': (nb_grid_pts[0], ) if ndims == 1 else nb_grid_pts }) while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15: arr_dict_str += ' ' arr_dict_str += '\n' header_len = len(arr_dict_str) + len(magic_str) + 2 file = MPI.File.Open(comm, fn, MPI.MODE_CREATE | MPI.MODE_WRONLY) if comm.Get_rank() == 0: file.Write(magic_str) file.Write(np.int16(len(arr_dict_str))) file.Write(arr_dict_str.encode('latin-1')) if fortran_order: # the returned array will be in fortran_order. # the data is loaded in C_contiguous array but in a transposed manner # data.transpose() is called which swaps the shapes back again and # toggles C-order to F-order ix = 1 iy = 0 else: ix = 0 iy = 1 mpitype = MPI._typedict[data.dtype.char] filetype = mpitype.Create_vector( nb_subdomain_grid_pts[ix], # number of blocks : length of data in the non-contiguous direction nb_subdomain_grid_pts[ iy], # length of block : length of data in contiguous direction nb_grid_pts[iy] # stepsize: the data is contiguous in y direction, # two matrix elements with same x position are separated by ny in memory ) # create a type # see MPI_TYPE_VECTOR filetype.Commit() # verification if type is OK file.Set_view( header_len + (subdomain_locations[ix] * nb_grid_pts[iy] + subdomain_locations[iy]) * mpitype.Get_size(), filetype=filetype) if fortran_order: data = data.transpose() file.Write_all(data.copy()) # TODO: is the copy needed ? filetype.Free() file.Close()