Example #1
0
def load_shape(n):
    with open(n, 'rb') as f:
        major, minor = read_magic(f)
        shape, fortran, dtype = read_array_header_1_0(f)
    if len(shape) != 4:
        raise TypeError('Errr! Single image... %s' % n)
    return shape
Example #2
0
def read_npy(fp, prn=False):
    """ read an npy file quickly
    : fp = file path
    :
    : file = "c:/temp/a01.npy"
    """
    frmt = """
    Magic {}
    Shape {},  C-contig {}, dtype {}
    """
    from numpy.lib import format as format_
    with open(fp, 'rb') as f:
        major, minor = format_.read_magic(f)
        mag = format_.magic(major, minor)
        shp, is_fortran, dt = format_.read_array_header_1_0(f)
        count = np.multiply.reduce(shp, dtype=np.int64)
        #data = f.readlines()

        BUFFER_SIZE = 2**18
        max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize)
        array = np.ndarray(count, dtype=dt)
        for i in range(0, count, max_read_count):
            read_count = min(max_read_count, count - i)
            read_size = int(read_count * dt.itemsize)
            data = format_._read_bytes(f, read_size, "array data")
            array[i:i + read_count] = np.frombuffer(data,
                                                    dtype=dt,
                                                    count=read_count)
        array.shape = shp
    if prn:
        print(dedent(frmt).format(mag, shp, (not is_fortran), dt))
    return array
Example #3
0
def read_npy(fp, prn=False):
    """ read an npy file quickly
    : fp = file path
    :
    : file = "c:/temp/a01.npy"
    """
    frmt = """
    Magic {}
    Shape {},  C-contig {}, dtype {}
    """
    from numpy.lib import format as format_
    with open(fp, 'rb') as f:
        major, minor = format_.read_magic(f)
        mag = format_.magic(major, minor)
        shp, is_fortran, dt = format_.read_array_header_1_0(f)
        count = np.multiply.reduce(shp, dtype=np.int64)
        #data = f.readlines()

        BUFFER_SIZE = 2**18
        max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize)
        array = np.ndarray(count, dtype=dt)
        for i in range(0, count, max_read_count):
            read_count = min(max_read_count, count - i)
            read_size = int(read_count * dt.itemsize)
            data = format_._read_bytes(f, read_size, "array data")
            array[i:i+read_count] = np.frombuffer(data, dtype=dt,
                                                  count=read_count)
        array.shape = shp
    if prn:
        print(dedent(frmt).format(mag, shp, (not is_fortran), dt))
    return array
Example #4
0
def test_read_array_header_1_0():
    s = BytesIO()

    arr = np.ones((3, 6), dtype=float)
    format.write_array(s, arr, version=(1, 0))

    s.seek(format.MAGIC_LEN)
    shape, fortran, dtype = format.read_array_header_1_0(s)

    assert_((shape, fortran, dtype) == ((3, 6), False, float))
Example #5
0
def read_header_data(fname):

    fp = open(fname, 'r')
    version = npfor.read_magic(fp)
    if version != (1, 0):
        msg = "only support version (1,0) of file format, not %r"
        raise ValueError(msg % (version,))
    shape, fortran_order, dtype = npfor.read_array_header_1_0(fp)
    header_length = fp.tell()
    return shape, fortran_order, dtype, header_length
Example #6
0
def read_header_data(fname):

    fp = open(fname, 'r')
    version = npfor.read_magic(fp)
    if version != (1, 0):
        msg = "only support version (1,0) of file format, not %r"
        raise ValueError(msg % (version, ))
    shape, fortran_order, dtype = npfor.read_array_header_1_0(fp)
    header_length = fp.tell()
    return shape, fortran_order, dtype, header_length
Example #7
0
 def __init__(self, filename):
     self._filename = filename
     self._data = {}
     npz = np.load(filename)
     file = npz.zip.fp
     for key in npz.files:
         filename = '{}.npy'.format(key)
         npz.zip.open(filename)
         version = nlf.read_magic(file)
         shape, fortran_order, dtype = nlf.read_array_header_1_0(file) if version == (1, 0) \
             else nlf.read_array_header_2_0(file)
         self._data[key] = np.memmap(file,
                                     dtype=dtype,
                                     mode='r',
                                     shape=shape,
                                     order='F' if fortran_order else 'C',
                                     offset=file.tell())
Example #8
0
def read_npy(fp, prn=False):
    """ Read an npy file quickly

    fp : string
        The file path: "c:/temp/a01.npy"
    prn : boolean
        obtain full information if True

    Requires:
    ---------
    from numpy.lib import format

    Notes:
    -------
    shortcut ... np.load("c:/temp/a01.npy")
    """
    frmt = """
    ---- npy reader ---------------------------------------------------------
    File  {}
    Shape {},  C-contig {},  dtype {}
    Magic {}
    -------------------------------------------------------------------------
    """
    with open(fp, 'rb') as f:
        major, minor = format.read_magic(f)
        mag = format.magic(major, minor)
        shp, is_fortran, dt = format.read_array_header_1_0(f)
        count = np.multiply.reduce(shp, dtype=np.int64)
        BUFFER_SIZE = 2**18
        max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize)
        array = np.ndarray(count, dtype=dt)
        for i in range(0, count, max_read_count):
            cnt = min(max_read_count, count - i)
            read_size = int(cnt * dt.itemsize)
            data = format._read_bytes(f, read_size, "array data")
            array[i:i + cnt] = np.frombuffer(data, dtype=dt, count=cnt)
        array.shape = shp
    if prn:
        print(dedent(frmt).format(fp, shp, (not is_fortran), dt, mag))
    return array
Example #9
0
def load(file_name, axis, mpi_comm=MPI.COMM_WORLD):
    """
    Load a numpy array across parallel jobs in the MPI communicator.
    The array is sliced along the chosen dimension, with minimal bandwidth.

    Parameters
    ----------
    file_name : str
        The numpy array file to load.
    axis : int
        The axis on which to distribute the array.
    mpi_comm : mpi4py.MPI.Comm, optional
        The MPI communicator used to distribute, by default MPI.COMM_WORLD.

    Returns
    -------
    (numpy.ndarray, tuple(int))
        The distributed array, and the size of the full array.

    Raises
    ------
    ValueError
        If the numpy version used to save the file is not supported.
    NotImplementedError
        If the array is saved in Fortran order.
    """

    header = None
    if is_root_process(mpi_comm):
        with open(file_name, 'rb') as fp:
            version, _ = npformat.read_magic(fp)

            if version == 1:
                header = npformat.read_array_header_1_0(fp)
            elif version == 2:
                header = npformat.read_array_header_2_0(fp)
            else:
                raise ValueError(
                    "Invalid numpy format version: {}".format(version))

            header = *header, fp.tell()

    header = mpi_comm.bcast(header, root=0)
    full_shape, fortran, dtype, header_offset = header

    if fortran:
        raise NotImplementedError(
            "Fortran-ordered (column-major) arrays are not supported")

    ndims = len(full_shape)
    axis = utils.positive_index(axis, ndims)

    i_start, bin_size = distribute_mpi(full_shape[axis], mpi_comm)

    l_shape = list(full_shape)
    l_shape[axis] = bin_size

    l_array = np.empty(l_shape, dtype=dtype)

    slice_type = create_slice_view(axis,
                                   bin_size,
                                   shape=full_shape,
                                   dtype=dtype)
    slice_type.Commit()

    single_slice_extent = slice_type.extent
    if bin_size != 0:
        single_slice_extent /= bin_size

    displacement = header_offset + i_start * single_slice_extent
    base_type = to_mpi_datatype(l_array.dtype)

    fh = MPI.File.Open(mpi_comm, file_name, MPI.MODE_RDONLY)
    fh.Set_view(displacement, filetype=slice_type)

    fh.Read_all([l_array, l_array.size, base_type])
    fh.Close()
    slice_type.Free()

    return l_array, full_shape
Example #10
0
def open_memmap(filename, mode='r+', dtype=None, shape=None,
                fortran_order=False, version=(1,0), offset=0):
    """
    Open a .npy file as a memory-mapped array, with offset argument.

    This may be used to read an existing file or create a new one.
    
    :param str filename: The name of the file on disk. This may not be a 
        file-like object.
    :param str mode: The mode to open the file with. In addition to the 
        standard file modes, 'c' is also accepted to mean "copy on write". 
        See `numpy.memmap` for the available mode strings.
    :param dtype dtype: The data type of the array if we are creating a 
        new file in "write" mode.
    :param tuple shape: The shape of the array if we are creating a new 
        file in "write" mode. Shape of (contiguous) slice if opening an 
        existing file.
    :param bool fortran_order: Whether the array should be Fortran-contiguous 
        (True) or C-contiguous (False) if we are creating a new file in 
        "write" mode.
    :param tuple version: If the mode is a "write" mode, then this is the 
        version (major, minor) of the file format used to create the file.
    :param int offset: Number of elements to skip along the first dimension.
    :return numpy.memmap: The memory-mapped array.

    Raises:
    
    * :exc:`ValueError` if the data or the mode is invalid
    * :exc:`IOError` if the file is not found or cannot be opened correctly.
    
    .. seealso:: :func:`numpy.memmap`
    """
    if not isinstance(filename, basestring):
        raise ValueError("Filename must be a string.  Memmap cannot use" \
                         " existing file handles.")

    if 'w' in mode:
        assert offset == 0, "Cannot specify offset when creating memmap"
        # We are creating the file, not reading it.
        # Check if we ought to create the file.
        if version != (1, 0):
            msg = "only support version (1,0) of file format, not %r"
            raise ValueError(msg % (version,))
        # Ensure that the given dtype is an authentic dtype object rather than
        # just something that can be interpreted as a dtype object.
        dtype = np.dtype(dtype)
        if dtype.hasobject:
            msg = "Array can't be memory-mapped: Python objects in dtype."
            raise ValueError(msg)
        d = dict(
            descr=dtype_to_descr(dtype),
            fortran_order=fortran_order,
            shape=shape,
        )
        # If we got here, then it should be safe to create the file.
        fp = open(filename, mode+'b')
        try:
            fp.write(magic(*version))
            write_array_header_1_0(fp, d)
            offset = fp.tell()
        finally:
            fp.close()
    else:
        # Read the header of the file first.
        fp = open(filename, 'rb')
        try:
            version = read_magic(fp)
            if version != (1, 0):
                msg = "only support version (1,0) of file format, not %r"
                raise ValueError(msg % (version,))
            fullshape, fortran_order, dtype = read_array_header_1_0(fp)
            
            if shape:
                length = np.atleast_1d(shape)
                msg = "Specify shape along first dimension only"
                assert length.ndim == 1, msg
            else:
                length = fullshape[0] - offset
            shape = (length,) + fullshape[1:]
            
            if dtype.hasobject:
                msg = "Array can't be memory-mapped: Python objects in dtype."
                raise ValueError(msg)
            
            offset_items = offset * np.prod(fullshape[1:], dtype=int)
            offset_bytes = fp.tell() + offset_items * dtype.itemsize
        finally:
            fp.close()
    
    if fortran_order:
        order = 'F'
    else:
        order = 'C'

    # We need to change a write-only mode to a read-write mode since we've
    # already written data to the file.
    if mode == 'w+':
        mode = 'r+'

    marray = np.memmap(filename, dtype=dtype, shape=shape, order=order,
        mode=mode, offset=offset_bytes)

    return marray