def load_shape(n): with open(n, 'rb') as f: major, minor = read_magic(f) shape, fortran, dtype = read_array_header_1_0(f) if len(shape) != 4: raise TypeError('Errr! Single image... %s' % n) return shape
def read_npy(fp, prn=False): """ read an npy file quickly : fp = file path : : file = "c:/temp/a01.npy" """ frmt = """ Magic {} Shape {}, C-contig {}, dtype {} """ from numpy.lib import format as format_ with open(fp, 'rb') as f: major, minor = format_.read_magic(f) mag = format_.magic(major, minor) shp, is_fortran, dt = format_.read_array_header_1_0(f) count = np.multiply.reduce(shp, dtype=np.int64) #data = f.readlines() BUFFER_SIZE = 2**18 max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize) array = np.ndarray(count, dtype=dt) for i in range(0, count, max_read_count): read_count = min(max_read_count, count - i) read_size = int(read_count * dt.itemsize) data = format_._read_bytes(f, read_size, "array data") array[i:i + read_count] = np.frombuffer(data, dtype=dt, count=read_count) array.shape = shp if prn: print(dedent(frmt).format(mag, shp, (not is_fortran), dt)) return array
def read_npy(fp, prn=False): """ read an npy file quickly : fp = file path : : file = "c:/temp/a01.npy" """ frmt = """ Magic {} Shape {}, C-contig {}, dtype {} """ from numpy.lib import format as format_ with open(fp, 'rb') as f: major, minor = format_.read_magic(f) mag = format_.magic(major, minor) shp, is_fortran, dt = format_.read_array_header_1_0(f) count = np.multiply.reduce(shp, dtype=np.int64) #data = f.readlines() BUFFER_SIZE = 2**18 max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize) array = np.ndarray(count, dtype=dt) for i in range(0, count, max_read_count): read_count = min(max_read_count, count - i) read_size = int(read_count * dt.itemsize) data = format_._read_bytes(f, read_size, "array data") array[i:i+read_count] = np.frombuffer(data, dtype=dt, count=read_count) array.shape = shp if prn: print(dedent(frmt).format(mag, shp, (not is_fortran), dt)) return array
def test_read_array_header_1_0(): s = BytesIO() arr = np.ones((3, 6), dtype=float) format.write_array(s, arr, version=(1, 0)) s.seek(format.MAGIC_LEN) shape, fortran, dtype = format.read_array_header_1_0(s) assert_((shape, fortran, dtype) == ((3, 6), False, float))
def read_header_data(fname): fp = open(fname, 'r') version = npfor.read_magic(fp) if version != (1, 0): msg = "only support version (1,0) of file format, not %r" raise ValueError(msg % (version,)) shape, fortran_order, dtype = npfor.read_array_header_1_0(fp) header_length = fp.tell() return shape, fortran_order, dtype, header_length
def read_header_data(fname): fp = open(fname, 'r') version = npfor.read_magic(fp) if version != (1, 0): msg = "only support version (1,0) of file format, not %r" raise ValueError(msg % (version, )) shape, fortran_order, dtype = npfor.read_array_header_1_0(fp) header_length = fp.tell() return shape, fortran_order, dtype, header_length
def __init__(self, filename): self._filename = filename self._data = {} npz = np.load(filename) file = npz.zip.fp for key in npz.files: filename = '{}.npy'.format(key) npz.zip.open(filename) version = nlf.read_magic(file) shape, fortran_order, dtype = nlf.read_array_header_1_0(file) if version == (1, 0) \ else nlf.read_array_header_2_0(file) self._data[key] = np.memmap(file, dtype=dtype, mode='r', shape=shape, order='F' if fortran_order else 'C', offset=file.tell())
def read_npy(fp, prn=False): """ Read an npy file quickly fp : string The file path: "c:/temp/a01.npy" prn : boolean obtain full information if True Requires: --------- from numpy.lib import format Notes: ------- shortcut ... np.load("c:/temp/a01.npy") """ frmt = """ ---- npy reader --------------------------------------------------------- File {} Shape {}, C-contig {}, dtype {} Magic {} ------------------------------------------------------------------------- """ with open(fp, 'rb') as f: major, minor = format.read_magic(f) mag = format.magic(major, minor) shp, is_fortran, dt = format.read_array_header_1_0(f) count = np.multiply.reduce(shp, dtype=np.int64) BUFFER_SIZE = 2**18 max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize) array = np.ndarray(count, dtype=dt) for i in range(0, count, max_read_count): cnt = min(max_read_count, count - i) read_size = int(cnt * dt.itemsize) data = format._read_bytes(f, read_size, "array data") array[i:i + cnt] = np.frombuffer(data, dtype=dt, count=cnt) array.shape = shp if prn: print(dedent(frmt).format(fp, shp, (not is_fortran), dt, mag)) return array
def load(file_name, axis, mpi_comm=MPI.COMM_WORLD): """ Load a numpy array across parallel jobs in the MPI communicator. The array is sliced along the chosen dimension, with minimal bandwidth. Parameters ---------- file_name : str The numpy array file to load. axis : int The axis on which to distribute the array. mpi_comm : mpi4py.MPI.Comm, optional The MPI communicator used to distribute, by default MPI.COMM_WORLD. Returns ------- (numpy.ndarray, tuple(int)) The distributed array, and the size of the full array. Raises ------ ValueError If the numpy version used to save the file is not supported. NotImplementedError If the array is saved in Fortran order. """ header = None if is_root_process(mpi_comm): with open(file_name, 'rb') as fp: version, _ = npformat.read_magic(fp) if version == 1: header = npformat.read_array_header_1_0(fp) elif version == 2: header = npformat.read_array_header_2_0(fp) else: raise ValueError( "Invalid numpy format version: {}".format(version)) header = *header, fp.tell() header = mpi_comm.bcast(header, root=0) full_shape, fortran, dtype, header_offset = header if fortran: raise NotImplementedError( "Fortran-ordered (column-major) arrays are not supported") ndims = len(full_shape) axis = utils.positive_index(axis, ndims) i_start, bin_size = distribute_mpi(full_shape[axis], mpi_comm) l_shape = list(full_shape) l_shape[axis] = bin_size l_array = np.empty(l_shape, dtype=dtype) slice_type = create_slice_view(axis, bin_size, shape=full_shape, dtype=dtype) slice_type.Commit() single_slice_extent = slice_type.extent if bin_size != 0: single_slice_extent /= bin_size displacement = header_offset + i_start * single_slice_extent base_type = to_mpi_datatype(l_array.dtype) fh = MPI.File.Open(mpi_comm, file_name, MPI.MODE_RDONLY) fh.Set_view(displacement, filetype=slice_type) fh.Read_all([l_array, l_array.size, base_type]) fh.Close() slice_type.Free() return l_array, full_shape
def open_memmap(filename, mode='r+', dtype=None, shape=None, fortran_order=False, version=(1,0), offset=0): """ Open a .npy file as a memory-mapped array, with offset argument. This may be used to read an existing file or create a new one. :param str filename: The name of the file on disk. This may not be a file-like object. :param str mode: The mode to open the file with. In addition to the standard file modes, 'c' is also accepted to mean "copy on write". See `numpy.memmap` for the available mode strings. :param dtype dtype: The data type of the array if we are creating a new file in "write" mode. :param tuple shape: The shape of the array if we are creating a new file in "write" mode. Shape of (contiguous) slice if opening an existing file. :param bool fortran_order: Whether the array should be Fortran-contiguous (True) or C-contiguous (False) if we are creating a new file in "write" mode. :param tuple version: If the mode is a "write" mode, then this is the version (major, minor) of the file format used to create the file. :param int offset: Number of elements to skip along the first dimension. :return numpy.memmap: The memory-mapped array. Raises: * :exc:`ValueError` if the data or the mode is invalid * :exc:`IOError` if the file is not found or cannot be opened correctly. .. seealso:: :func:`numpy.memmap` """ if not isinstance(filename, basestring): raise ValueError("Filename must be a string. Memmap cannot use" \ " existing file handles.") if 'w' in mode: assert offset == 0, "Cannot specify offset when creating memmap" # We are creating the file, not reading it. # Check if we ought to create the file. if version != (1, 0): msg = "only support version (1,0) of file format, not %r" raise ValueError(msg % (version,)) # Ensure that the given dtype is an authentic dtype object rather than # just something that can be interpreted as a dtype object. dtype = np.dtype(dtype) if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) d = dict( descr=dtype_to_descr(dtype), fortran_order=fortran_order, shape=shape, ) # If we got here, then it should be safe to create the file. fp = open(filename, mode+'b') try: fp.write(magic(*version)) write_array_header_1_0(fp, d) offset = fp.tell() finally: fp.close() else: # Read the header of the file first. fp = open(filename, 'rb') try: version = read_magic(fp) if version != (1, 0): msg = "only support version (1,0) of file format, not %r" raise ValueError(msg % (version,)) fullshape, fortran_order, dtype = read_array_header_1_0(fp) if shape: length = np.atleast_1d(shape) msg = "Specify shape along first dimension only" assert length.ndim == 1, msg else: length = fullshape[0] - offset shape = (length,) + fullshape[1:] if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) offset_items = offset * np.prod(fullshape[1:], dtype=int) offset_bytes = fp.tell() + offset_items * dtype.itemsize finally: fp.close() if fortran_order: order = 'F' else: order = 'C' # We need to change a write-only mode to a read-write mode since we've # already written data to the file. if mode == 'w+': mode = 'r+' marray = np.memmap(filename, dtype=dtype, shape=shape, order=order, mode=mode, offset=offset_bytes) return marray