Beispiel #1
0
def write_header_data(fname, header_data):
    
    version = (1, 0)
    fp = open(fname, 'r+')
    fp.write(npfor.magic(*version))
    # Custom version fo write_array_header, not the np.lib.format version.
    write_array_header_1_0(fp, header_data)
Beispiel #2
0
def read_npy(fp, prn=False):
    """ read an npy file quickly
    : fp = file path
    :
    : file = "c:/temp/a01.npy"
    """
    frmt = """
    Magic {}
    Shape {},  C-contig {}, dtype {}
    """
    from numpy.lib import format as format_
    with open(fp, 'rb') as f:
        major, minor = format_.read_magic(f)
        mag = format_.magic(major, minor)
        shp, is_fortran, dt = format_.read_array_header_1_0(f)
        count = np.multiply.reduce(shp, dtype=np.int64)
        #data = f.readlines()

        BUFFER_SIZE = 2**18
        max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize)
        array = np.ndarray(count, dtype=dt)
        for i in range(0, count, max_read_count):
            read_count = min(max_read_count, count - i)
            read_size = int(read_count * dt.itemsize)
            data = format_._read_bytes(f, read_size, "array data")
            array[i:i + read_count] = np.frombuffer(data,
                                                    dtype=dt,
                                                    count=read_count)
        array.shape = shp
    if prn:
        print(dedent(frmt).format(mag, shp, (not is_fortran), dt))
    return array
Beispiel #3
0
def write_header_data(fname, header_data):

    version = (1, 0)
    fp = open(fname, 'r+')
    fp.write(npfor.magic(*version))
    # Custom version fo write_array_header, not the np.lib.format version.
    write_array_header_1_0(fp, header_data)
Beispiel #4
0
def read_npy(fp, prn=False):
    """ read an npy file quickly
    : fp = file path
    :
    : file = "c:/temp/a01.npy"
    """
    frmt = """
    Magic {}
    Shape {},  C-contig {}, dtype {}
    """
    from numpy.lib import format as format_
    with open(fp, 'rb') as f:
        major, minor = format_.read_magic(f)
        mag = format_.magic(major, minor)
        shp, is_fortran, dt = format_.read_array_header_1_0(f)
        count = np.multiply.reduce(shp, dtype=np.int64)
        #data = f.readlines()

        BUFFER_SIZE = 2**18
        max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize)
        array = np.ndarray(count, dtype=dt)
        for i in range(0, count, max_read_count):
            read_count = min(max_read_count, count - i)
            read_size = int(read_count * dt.itemsize)
            data = format_._read_bytes(f, read_size, "array data")
            array[i:i+read_count] = np.frombuffer(data, dtype=dt,
                                                  count=read_count)
        array.shape = shp
    if prn:
        print(dedent(frmt).format(mag, shp, (not is_fortran), dt))
    return array
Beispiel #5
0
def save_mpiio(comm, fn, g_kl):
    """
    Write a global two-dimensional array to a single file in the npy format
    using MPI I/O: https://docs.scipy.org/doc/numpy/neps/npy-format.html

    Arrays written with this function can be read with numpy.load.

    Parameters
    ----------
    comm
        MPI communicator.
    fn : str
        File name.
    g_kl : array
        Portion of the array on this MPI processes.
    """
    from numpy.lib.format import dtype_to_descr, magic
    magic_str = magic(1, 0)

    local_nx, local_ny = g_kl.shape
    nx = np.empty_like(local_nx)
    ny = np.empty_like(local_ny)

    commx = comm.Sub((True, False))
    commy = comm.Sub((False, True))
    commx.Allreduce(np.asarray(local_nx), nx)
    commy.Allreduce(np.asarray(local_ny), ny)

    arr_dict_str = str({
        'descr': dtype_to_descr(g_kl.dtype),
        'fortran_order': False,
        'shape': (np.asscalar(nx), np.asscalar(ny))
    })
    while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15:
        arr_dict_str += ' '
    arr_dict_str += '\n'
    header_len = len(arr_dict_str) + len(magic_str) + 2

    offsetx = np.zeros_like(local_nx)
    commx.Exscan(np.asarray(ny * local_nx), offsetx)
    offsety = np.zeros_like(local_ny)
    commy.Exscan(np.asarray(local_ny), offsety)

    file = MPI.File.Open(comm, fn, MPI.MODE_CREATE | MPI.MODE_WRONLY)
    if MPI.COMM_WORLD.Get_rank() == 0:
        file.Write(magic_str)
        file.Write(np.int16(len(arr_dict_str)))
        file.Write(arr_dict_str.encode('latin-1'))
    mpitype = MPI._typedict[g_kl.dtype.char]
    filetype = mpitype.Create_vector(g_kl.shape[0], g_kl.shape[1], ny)
    filetype.Commit()
    file.Set_view(header_len + (offsety + offsetx) * mpitype.Get_size(),
                  filetype=filetype)
    file.Write_all(g_kl.copy())
    filetype.Free()
    file.Close()
Beispiel #6
0
    def save_mpiio(self, file_name: str, vec: np.ndarray) -> None:
        """
        Write a global two-dimensional array to a single file in the npy format
        using MPI I/O.

        Arrays written with this function can be read with numpy.load.

        Args:
            file_name (str): File name.
            vec (np.ndarray):
            Portion of the array on this MPI processes. This needs to be a
            two-dimensional array.
        """

        magic_str = magic(1, 0)
        x_size, y_size = vec.shape

        vx, vy = np.empty_like(x_size), np.empty_like(y_size)

        commx = self.rank_grid.Sub((True, False))
        commy = self.rank_grid.Sub((False, True))
        commx.Allreduce(np.asarray(x_size), vx)
        commy.Allreduce(np.asarray(y_size), vy)

        arr_dict_str = str({
            'descr': dtype_to_descr(vec.dtype),
            'fortran_order': False,
            'shape': (np.asscalar(vx), np.asscalar(vy))
        })

        while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15:
            arr_dict_str += ' '
        arr_dict_str += '\n'
        header_len = len(arr_dict_str) + len(magic_str) + 2

        x_offset = np.zeros_like(x_size)
        commx.Exscan(np.asarray(vy * x_size), x_offset)
        y_offset = np.zeros_like(y_size)
        commy.Exscan(np.asarray(y_size), y_offset)

        file = MPI.File.Open(self.rank_grid, file_name,
                             MPI.MODE_CREATE | MPI.MODE_WRONLY)
        if self.rank == 0:
            file.Write(magic_str)
            file.Write(np.int16(len(arr_dict_str)))
            file.Write(arr_dict_str.encode('latin-1'))
        mpitype = MPI._typedict[vec.dtype.char]
        filetype = mpitype.Create_vector(x_size, y_size, vy)
        filetype.Commit()
        file.Set_view(header_len + (y_offset + x_offset) * mpitype.Get_size(),
                      filetype=filetype)
        file.Write_all(vec.copy())
        filetype.Free()
        file.Close()
Beispiel #7
0
def read_npy(fp, prn=False):
    """ Read an npy file quickly

    fp : string
        The file path: "c:/temp/a01.npy"
    prn : boolean
        obtain full information if True

    Requires:
    ---------
    from numpy.lib import format

    Notes:
    -------
    shortcut ... np.load("c:/temp/a01.npy")
    """
    frmt = """
    ---- npy reader ---------------------------------------------------------
    File  {}
    Shape {},  C-contig {},  dtype {}
    Magic {}
    -------------------------------------------------------------------------
    """
    with open(fp, 'rb') as f:
        major, minor = format.read_magic(f)
        mag = format.magic(major, minor)
        shp, is_fortran, dt = format.read_array_header_1_0(f)
        count = np.multiply.reduce(shp, dtype=np.int64)
        BUFFER_SIZE = 2**18
        max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize)
        array = np.ndarray(count, dtype=dt)
        for i in range(0, count, max_read_count):
            cnt = min(max_read_count, count - i)
            read_size = int(cnt * dt.itemsize)
            data = format._read_bytes(f, read_size, "array data")
            array[i:i + cnt] = np.frombuffer(data, dtype=dt, count=cnt)
        array.shape = shp
    if prn:
        print(dedent(frmt).format(fp, shp, (not is_fortran), dt, mag))
    return array
    def _read_header(self):
        self.file = None
        try:
            magic_str = magic(1, 0)
            self.file = MPI.File.Open(self.comm, self.fn, MPI.MODE_RDONLY)  #
            magic_str = mpi_read_bytes(self.file, len(magic_str))
            if magic_str[:-2] != MAGIC_PREFIX:
                raise MPIFileTypeError(
                    "MAGIC_PREFIX missing at the beginning of file {}".format(
                        self.fn))

            version = magic_str[-2:]

            if version == b'\x01\x00':
                hlength_type = '<H'
            elif version == b'\x02\x00':
                hlength_type = '<I'
            else:
                raise MPIFileTypeError("Invalid version %r" % version)

            hlength_str = mpi_read_bytes(self.file,
                                         struct.calcsize(hlength_type))
            header_length = struct.unpack(hlength_type, hlength_str)[0]
            header = mpi_read_bytes(self.file, header_length)

            header = _filter_header(asstr(header))
            d = safe_eval(
                header
            )  # TODO: Copy from _read_array_header  with all the assertions
            self.dtype = np.dtype(d['descr'])
            self.fortran_order = d['fortran_order']
            self.nb_grid_pts = d['shape']
        except Exception as err:
            # FIXME! This should be handled through a resource manager
            if self.file is not None:
                self.file.Close()
            raise err
Beispiel #9
0
def open_memmap(filename, mode='r+', dtype=None, shape=None,
                fortran_order=False, version=(1,0), offset=0):
    """
    Open a .npy file as a memory-mapped array, with offset argument.

    This may be used to read an existing file or create a new one.
    
    :param str filename: The name of the file on disk. This may not be a 
        file-like object.
    :param str mode: The mode to open the file with. In addition to the 
        standard file modes, 'c' is also accepted to mean "copy on write". 
        See `numpy.memmap` for the available mode strings.
    :param dtype dtype: The data type of the array if we are creating a 
        new file in "write" mode.
    :param tuple shape: The shape of the array if we are creating a new 
        file in "write" mode. Shape of (contiguous) slice if opening an 
        existing file.
    :param bool fortran_order: Whether the array should be Fortran-contiguous 
        (True) or C-contiguous (False) if we are creating a new file in 
        "write" mode.
    :param tuple version: If the mode is a "write" mode, then this is the 
        version (major, minor) of the file format used to create the file.
    :param int offset: Number of elements to skip along the first dimension.
    :return numpy.memmap: The memory-mapped array.

    Raises:
    
    * :exc:`ValueError` if the data or the mode is invalid
    * :exc:`IOError` if the file is not found or cannot be opened correctly.
    
    .. seealso:: :func:`numpy.memmap`
    """
    if not isinstance(filename, basestring):
        raise ValueError("Filename must be a string.  Memmap cannot use" \
                         " existing file handles.")

    if 'w' in mode:
        assert offset == 0, "Cannot specify offset when creating memmap"
        # We are creating the file, not reading it.
        # Check if we ought to create the file.
        if version != (1, 0):
            msg = "only support version (1,0) of file format, not %r"
            raise ValueError(msg % (version,))
        # Ensure that the given dtype is an authentic dtype object rather than
        # just something that can be interpreted as a dtype object.
        dtype = np.dtype(dtype)
        if dtype.hasobject:
            msg = "Array can't be memory-mapped: Python objects in dtype."
            raise ValueError(msg)
        d = dict(
            descr=dtype_to_descr(dtype),
            fortran_order=fortran_order,
            shape=shape,
        )
        # If we got here, then it should be safe to create the file.
        fp = open(filename, mode+'b')
        try:
            fp.write(magic(*version))
            write_array_header_1_0(fp, d)
            offset = fp.tell()
        finally:
            fp.close()
    else:
        # Read the header of the file first.
        fp = open(filename, 'rb')
        try:
            version = read_magic(fp)
            if version != (1, 0):
                msg = "only support version (1,0) of file format, not %r"
                raise ValueError(msg % (version,))
            fullshape, fortran_order, dtype = read_array_header_1_0(fp)
            
            if shape:
                length = np.atleast_1d(shape)
                msg = "Specify shape along first dimension only"
                assert length.ndim == 1, msg
            else:
                length = fullshape[0] - offset
            shape = (length,) + fullshape[1:]
            
            if dtype.hasobject:
                msg = "Array can't be memory-mapped: Python objects in dtype."
                raise ValueError(msg)
            
            offset_items = offset * np.prod(fullshape[1:], dtype=int)
            offset_bytes = fp.tell() + offset_items * dtype.itemsize
        finally:
            fp.close()
    
    if fortran_order:
        order = 'F'
    else:
        order = 'C'

    # We need to change a write-only mode to a read-write mode since we've
    # already written data to the file.
    if mode == 'w+':
        mode = 'r+'

    marray = np.memmap(filename, dtype=dtype, shape=shape, order=order,
        mode=mode, offset=offset_bytes)

    return marray
Beispiel #10
0
 def _get_npy_header(h5entry):
     header = StringIO()
     header.write(magic(1, 0))  # XXX: support only 1.0 version of npy file
     hwrite(header, hdata(h5entry.value))
     return header.getvalue()
def save_npy(fn,
             data,
             subdomain_locations=None,
             nb_grid_pts=None,
             comm=MPI.COMM_WORLD):
    """

    Parameters
    ----------
    data : numpy array : data owned by the processor
    location : index of the first element of data within the global data
    nb_grid_pts : nb_grid_pts of the global data
    comm : MPI communicator

    Returns
    -------

    """
    data = np.asarray(data)
    ndims = len(data.shape)

    if ndims == 1:
        data = data.reshape((-1, 1))

    if subdomain_locations is None:
        subdomain_locations = (0, 0)
    elif ndims == 1:
        subdomain_locations = (subdomain_locations, 0)
    nb_subdomain_grid_pts = data.shape

    if nb_grid_pts is None:
        nb_grid_pts = nb_subdomain_grid_pts
    elif ndims == 1:
        nb_grid_pts = (nb_grid_pts, 1)

    fortran_order = np.isfortran(data)

    from numpy.lib.format import dtype_to_descr, magic
    magic_str = magic(1, 0)
    arr_dict_str = str({
        'descr':
        dtype_to_descr(data.dtype),
        'fortran_order':
        fortran_order,
        'shape': (nb_grid_pts[0], ) if ndims == 1 else nb_grid_pts
    })

    while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15:
        arr_dict_str += ' '
    arr_dict_str += '\n'
    header_len = len(arr_dict_str) + len(magic_str) + 2

    file = MPI.File.Open(comm, fn, MPI.MODE_CREATE | MPI.MODE_WRONLY)
    if comm.Get_rank() == 0:
        file.Write(magic_str)
        file.Write(np.int16(len(arr_dict_str)))
        file.Write(arr_dict_str.encode('latin-1'))

    if fortran_order:
        # the returned array will be in fortran_order.
        # the data is loaded in C_contiguous array but in a transposed manner
        # data.transpose() is called which swaps the shapes back again and
        # toggles C-order to F-order
        ix = 1
        iy = 0
    else:
        ix = 0
        iy = 1

    mpitype = MPI._typedict[data.dtype.char]
    filetype = mpitype.Create_vector(
        nb_subdomain_grid_pts[ix],
        # number of blocks  : length of data in the non-contiguous direction
        nb_subdomain_grid_pts[
            iy],  # length of block : length of data in contiguous direction
        nb_grid_pts[iy]
        # stepsize: the data is contiguous in y direction,
        # two matrix elements with same x position are separated by ny in memory
    )  # create a type
    # see MPI_TYPE_VECTOR

    filetype.Commit()  # verification if type is OK
    file.Set_view(
        header_len +
        (subdomain_locations[ix] * nb_grid_pts[iy] + subdomain_locations[iy]) *
        mpitype.Get_size(),
        filetype=filetype)
    if fortran_order:
        data = data.transpose()
    file.Write_all(data.copy())  # TODO: is the copy needed ?
    filetype.Free()

    file.Close()