def write_header_data(fname, header_data): version = (1, 0) fp = open(fname, 'r+') fp.write(npfor.magic(*version)) # Custom version fo write_array_header, not the np.lib.format version. write_array_header_1_0(fp, header_data)
def read_npy(fp, prn=False): """ read an npy file quickly : fp = file path : : file = "c:/temp/a01.npy" """ frmt = """ Magic {} Shape {}, C-contig {}, dtype {} """ from numpy.lib import format as format_ with open(fp, 'rb') as f: major, minor = format_.read_magic(f) mag = format_.magic(major, minor) shp, is_fortran, dt = format_.read_array_header_1_0(f) count = np.multiply.reduce(shp, dtype=np.int64) #data = f.readlines() BUFFER_SIZE = 2**18 max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize) array = np.ndarray(count, dtype=dt) for i in range(0, count, max_read_count): read_count = min(max_read_count, count - i) read_size = int(read_count * dt.itemsize) data = format_._read_bytes(f, read_size, "array data") array[i:i + read_count] = np.frombuffer(data, dtype=dt, count=read_count) array.shape = shp if prn: print(dedent(frmt).format(mag, shp, (not is_fortran), dt)) return array
def read_npy(fp, prn=False): """ read an npy file quickly : fp = file path : : file = "c:/temp/a01.npy" """ frmt = """ Magic {} Shape {}, C-contig {}, dtype {} """ from numpy.lib import format as format_ with open(fp, 'rb') as f: major, minor = format_.read_magic(f) mag = format_.magic(major, minor) shp, is_fortran, dt = format_.read_array_header_1_0(f) count = np.multiply.reduce(shp, dtype=np.int64) #data = f.readlines() BUFFER_SIZE = 2**18 max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize) array = np.ndarray(count, dtype=dt) for i in range(0, count, max_read_count): read_count = min(max_read_count, count - i) read_size = int(read_count * dt.itemsize) data = format_._read_bytes(f, read_size, "array data") array[i:i+read_count] = np.frombuffer(data, dtype=dt, count=read_count) array.shape = shp if prn: print(dedent(frmt).format(mag, shp, (not is_fortran), dt)) return array
def save_mpiio(comm, fn, g_kl): """ Write a global two-dimensional array to a single file in the npy format using MPI I/O: https://docs.scipy.org/doc/numpy/neps/npy-format.html Arrays written with this function can be read with numpy.load. Parameters ---------- comm MPI communicator. fn : str File name. g_kl : array Portion of the array on this MPI processes. """ from numpy.lib.format import dtype_to_descr, magic magic_str = magic(1, 0) local_nx, local_ny = g_kl.shape nx = np.empty_like(local_nx) ny = np.empty_like(local_ny) commx = comm.Sub((True, False)) commy = comm.Sub((False, True)) commx.Allreduce(np.asarray(local_nx), nx) commy.Allreduce(np.asarray(local_ny), ny) arr_dict_str = str({ 'descr': dtype_to_descr(g_kl.dtype), 'fortran_order': False, 'shape': (np.asscalar(nx), np.asscalar(ny)) }) while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15: arr_dict_str += ' ' arr_dict_str += '\n' header_len = len(arr_dict_str) + len(magic_str) + 2 offsetx = np.zeros_like(local_nx) commx.Exscan(np.asarray(ny * local_nx), offsetx) offsety = np.zeros_like(local_ny) commy.Exscan(np.asarray(local_ny), offsety) file = MPI.File.Open(comm, fn, MPI.MODE_CREATE | MPI.MODE_WRONLY) if MPI.COMM_WORLD.Get_rank() == 0: file.Write(magic_str) file.Write(np.int16(len(arr_dict_str))) file.Write(arr_dict_str.encode('latin-1')) mpitype = MPI._typedict[g_kl.dtype.char] filetype = mpitype.Create_vector(g_kl.shape[0], g_kl.shape[1], ny) filetype.Commit() file.Set_view(header_len + (offsety + offsetx) * mpitype.Get_size(), filetype=filetype) file.Write_all(g_kl.copy()) filetype.Free() file.Close()
def save_mpiio(self, file_name: str, vec: np.ndarray) -> None: """ Write a global two-dimensional array to a single file in the npy format using MPI I/O. Arrays written with this function can be read with numpy.load. Args: file_name (str): File name. vec (np.ndarray): Portion of the array on this MPI processes. This needs to be a two-dimensional array. """ magic_str = magic(1, 0) x_size, y_size = vec.shape vx, vy = np.empty_like(x_size), np.empty_like(y_size) commx = self.rank_grid.Sub((True, False)) commy = self.rank_grid.Sub((False, True)) commx.Allreduce(np.asarray(x_size), vx) commy.Allreduce(np.asarray(y_size), vy) arr_dict_str = str({ 'descr': dtype_to_descr(vec.dtype), 'fortran_order': False, 'shape': (np.asscalar(vx), np.asscalar(vy)) }) while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15: arr_dict_str += ' ' arr_dict_str += '\n' header_len = len(arr_dict_str) + len(magic_str) + 2 x_offset = np.zeros_like(x_size) commx.Exscan(np.asarray(vy * x_size), x_offset) y_offset = np.zeros_like(y_size) commy.Exscan(np.asarray(y_size), y_offset) file = MPI.File.Open(self.rank_grid, file_name, MPI.MODE_CREATE | MPI.MODE_WRONLY) if self.rank == 0: file.Write(magic_str) file.Write(np.int16(len(arr_dict_str))) file.Write(arr_dict_str.encode('latin-1')) mpitype = MPI._typedict[vec.dtype.char] filetype = mpitype.Create_vector(x_size, y_size, vy) filetype.Commit() file.Set_view(header_len + (y_offset + x_offset) * mpitype.Get_size(), filetype=filetype) file.Write_all(vec.copy()) filetype.Free() file.Close()
def read_npy(fp, prn=False): """ Read an npy file quickly fp : string The file path: "c:/temp/a01.npy" prn : boolean obtain full information if True Requires: --------- from numpy.lib import format Notes: ------- shortcut ... np.load("c:/temp/a01.npy") """ frmt = """ ---- npy reader --------------------------------------------------------- File {} Shape {}, C-contig {}, dtype {} Magic {} ------------------------------------------------------------------------- """ with open(fp, 'rb') as f: major, minor = format.read_magic(f) mag = format.magic(major, minor) shp, is_fortran, dt = format.read_array_header_1_0(f) count = np.multiply.reduce(shp, dtype=np.int64) BUFFER_SIZE = 2**18 max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize) array = np.ndarray(count, dtype=dt) for i in range(0, count, max_read_count): cnt = min(max_read_count, count - i) read_size = int(cnt * dt.itemsize) data = format._read_bytes(f, read_size, "array data") array[i:i + cnt] = np.frombuffer(data, dtype=dt, count=cnt) array.shape = shp if prn: print(dedent(frmt).format(fp, shp, (not is_fortran), dt, mag)) return array
def _read_header(self): self.file = None try: magic_str = magic(1, 0) self.file = MPI.File.Open(self.comm, self.fn, MPI.MODE_RDONLY) # magic_str = mpi_read_bytes(self.file, len(magic_str)) if magic_str[:-2] != MAGIC_PREFIX: raise MPIFileTypeError( "MAGIC_PREFIX missing at the beginning of file {}".format( self.fn)) version = magic_str[-2:] if version == b'\x01\x00': hlength_type = '<H' elif version == b'\x02\x00': hlength_type = '<I' else: raise MPIFileTypeError("Invalid version %r" % version) hlength_str = mpi_read_bytes(self.file, struct.calcsize(hlength_type)) header_length = struct.unpack(hlength_type, hlength_str)[0] header = mpi_read_bytes(self.file, header_length) header = _filter_header(asstr(header)) d = safe_eval( header ) # TODO: Copy from _read_array_header with all the assertions self.dtype = np.dtype(d['descr']) self.fortran_order = d['fortran_order'] self.nb_grid_pts = d['shape'] except Exception as err: # FIXME! This should be handled through a resource manager if self.file is not None: self.file.Close() raise err
def open_memmap(filename, mode='r+', dtype=None, shape=None, fortran_order=False, version=(1,0), offset=0): """ Open a .npy file as a memory-mapped array, with offset argument. This may be used to read an existing file or create a new one. :param str filename: The name of the file on disk. This may not be a file-like object. :param str mode: The mode to open the file with. In addition to the standard file modes, 'c' is also accepted to mean "copy on write". See `numpy.memmap` for the available mode strings. :param dtype dtype: The data type of the array if we are creating a new file in "write" mode. :param tuple shape: The shape of the array if we are creating a new file in "write" mode. Shape of (contiguous) slice if opening an existing file. :param bool fortran_order: Whether the array should be Fortran-contiguous (True) or C-contiguous (False) if we are creating a new file in "write" mode. :param tuple version: If the mode is a "write" mode, then this is the version (major, minor) of the file format used to create the file. :param int offset: Number of elements to skip along the first dimension. :return numpy.memmap: The memory-mapped array. Raises: * :exc:`ValueError` if the data or the mode is invalid * :exc:`IOError` if the file is not found or cannot be opened correctly. .. seealso:: :func:`numpy.memmap` """ if not isinstance(filename, basestring): raise ValueError("Filename must be a string. Memmap cannot use" \ " existing file handles.") if 'w' in mode: assert offset == 0, "Cannot specify offset when creating memmap" # We are creating the file, not reading it. # Check if we ought to create the file. if version != (1, 0): msg = "only support version (1,0) of file format, not %r" raise ValueError(msg % (version,)) # Ensure that the given dtype is an authentic dtype object rather than # just something that can be interpreted as a dtype object. dtype = np.dtype(dtype) if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) d = dict( descr=dtype_to_descr(dtype), fortran_order=fortran_order, shape=shape, ) # If we got here, then it should be safe to create the file. fp = open(filename, mode+'b') try: fp.write(magic(*version)) write_array_header_1_0(fp, d) offset = fp.tell() finally: fp.close() else: # Read the header of the file first. fp = open(filename, 'rb') try: version = read_magic(fp) if version != (1, 0): msg = "only support version (1,0) of file format, not %r" raise ValueError(msg % (version,)) fullshape, fortran_order, dtype = read_array_header_1_0(fp) if shape: length = np.atleast_1d(shape) msg = "Specify shape along first dimension only" assert length.ndim == 1, msg else: length = fullshape[0] - offset shape = (length,) + fullshape[1:] if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) offset_items = offset * np.prod(fullshape[1:], dtype=int) offset_bytes = fp.tell() + offset_items * dtype.itemsize finally: fp.close() if fortran_order: order = 'F' else: order = 'C' # We need to change a write-only mode to a read-write mode since we've # already written data to the file. if mode == 'w+': mode = 'r+' marray = np.memmap(filename, dtype=dtype, shape=shape, order=order, mode=mode, offset=offset_bytes) return marray
def _get_npy_header(h5entry): header = StringIO() header.write(magic(1, 0)) # XXX: support only 1.0 version of npy file hwrite(header, hdata(h5entry.value)) return header.getvalue()
def save_npy(fn, data, subdomain_locations=None, nb_grid_pts=None, comm=MPI.COMM_WORLD): """ Parameters ---------- data : numpy array : data owned by the processor location : index of the first element of data within the global data nb_grid_pts : nb_grid_pts of the global data comm : MPI communicator Returns ------- """ data = np.asarray(data) ndims = len(data.shape) if ndims == 1: data = data.reshape((-1, 1)) if subdomain_locations is None: subdomain_locations = (0, 0) elif ndims == 1: subdomain_locations = (subdomain_locations, 0) nb_subdomain_grid_pts = data.shape if nb_grid_pts is None: nb_grid_pts = nb_subdomain_grid_pts elif ndims == 1: nb_grid_pts = (nb_grid_pts, 1) fortran_order = np.isfortran(data) from numpy.lib.format import dtype_to_descr, magic magic_str = magic(1, 0) arr_dict_str = str({ 'descr': dtype_to_descr(data.dtype), 'fortran_order': fortran_order, 'shape': (nb_grid_pts[0], ) if ndims == 1 else nb_grid_pts }) while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15: arr_dict_str += ' ' arr_dict_str += '\n' header_len = len(arr_dict_str) + len(magic_str) + 2 file = MPI.File.Open(comm, fn, MPI.MODE_CREATE | MPI.MODE_WRONLY) if comm.Get_rank() == 0: file.Write(magic_str) file.Write(np.int16(len(arr_dict_str))) file.Write(arr_dict_str.encode('latin-1')) if fortran_order: # the returned array will be in fortran_order. # the data is loaded in C_contiguous array but in a transposed manner # data.transpose() is called which swaps the shapes back again and # toggles C-order to F-order ix = 1 iy = 0 else: ix = 0 iy = 1 mpitype = MPI._typedict[data.dtype.char] filetype = mpitype.Create_vector( nb_subdomain_grid_pts[ix], # number of blocks : length of data in the non-contiguous direction nb_subdomain_grid_pts[ iy], # length of block : length of data in contiguous direction nb_grid_pts[iy] # stepsize: the data is contiguous in y direction, # two matrix elements with same x position are separated by ny in memory ) # create a type # see MPI_TYPE_VECTOR filetype.Commit() # verification if type is OK file.Set_view( header_len + (subdomain_locations[ix] * nb_grid_pts[iy] + subdomain_locations[iy]) * mpitype.Get_size(), filetype=filetype) if fortran_order: data = data.transpose() file.Write_all(data.copy()) # TODO: is the copy needed ? filetype.Free() file.Close()