Example #1
0
def arrayfile(data_file, shape, descr, fortran=False):
    '''
    Returns an array that is memory-mapped to an NPY (v1.0) file

    Arguments
    ---------
    data_file :
        a file-like object opened with write mode compatible to NumPy's
        memory-mapped array types (see `numpy.memmap`). It is responsibility of
        the caller to close the file.

    shape : tuple
        shape of the ndarray.

    descr : str
        a typecode str (see `array` of `numpy.dtype`). Will be converted to a
        NumPy dtype.

    fortran : bool
        optional; if True, the array uses Fortran data order. Default: use C
        order.
    '''
    from numpy.lib import format
    header = {'descr': descr, 'fortran_order': fortran, 'shape': shape}
    cio = StringIO()
    format.write_array_header_1_0(cio, header)  # write header here first
    format.write_array_header_1_0(data_file, header)  # write header
    cio.seek(0)
    offset = len(cio.readline())  # get offset
    return np.memmap(data_file,
                     dtype=np.dtype(descr),
                     mode=data_file.mode,
                     shape=shape,
                     offset=offset)
Example #2
0
def test_bad_header():
    # header of length less than 2 should fail
    s = BytesIO()
    assert_raises(ValueError, format.read_array_header_1_0, s)
    s = BytesIO(asbytes('1'))
    assert_raises(ValueError, format.read_array_header_1_0, s)

    # header shorter than indicated size should fail
    s = BytesIO(asbytes('\x01\x00'))
    assert_raises(ValueError, format.read_array_header_1_0, s)

    # headers without the exact keys required should fail
    d = {"shape": (1, 2),
         "descr": "x"}
    s = BytesIO()
    format.write_array_header_1_0(s, d)
    assert_raises(ValueError, format.read_array_header_1_0, s)

    d = {"shape": (1, 2),
         "fortran_order": False,
         "descr": "x",
         "extrakey": -1}
    s = BytesIO()
    format.write_array_header_1_0(s, d)
    assert_raises(ValueError, format.read_array_header_1_0, s)
Example #3
0
def test_large_header():
    s = BytesIO()
    d = {'a': 1, 'b': 2}
    format.write_array_header_1_0(s, d)

    s = BytesIO()
    d = {'a': 1, 'b': 2, 'c': 'x'*256*256}
    assert_raises(ValueError, format.write_array_header_1_0, s, d)
Example #4
0
def test_large_header():
    s = StringIO()
    d = {"a": 1, "b": 2}
    format.write_array_header_1_0(s, d)

    s = StringIO()
    d = {"a": 1, "b": 2, "c": "x" * 256 * 256}
    assert_raises(ValueError, format.write_array_header_1_0, s, d)
Example #5
0
def test_large_header():
    s = BytesIO()
    d = {"a": 1, "b": 2}
    format.write_array_header_1_0(s, d)

    s = BytesIO()
    d = {"a": 1, "b": 2, "c": "x" * 256 * 256}
    assert_raises(ValueError, format.write_array_header_1_0, s, d)
Example #6
0
def _npy_size(ary):
    assert not ary.dtype.hasobject
    magic_len = npy.MAGIC_LEN

    # TODO: could calculate this directly
    with closing(StringIO()) as sio:
        npy.write_array_header_1_0(sio, npy.header_data_from_array_1_0(ary))
        header_len = sio.tell()

    data_len = ary.dtype.itemsize * ary.size

    return magic_len + header_len + data_len
Example #7
0
def write_localarray(fp, arr, version=(1, 0)):
    """
    Write a LocalArray to a .dap file, including a header.

    The ``__version__`` and ``dim_data`` keys from the Distributed Array
    Protocol are written to a header, then ``numpy.save`` is used to write the
    value of the ``buffer`` key.

    Parameters
    ----------
    fp : file_like object
        An open, writable file object, or similar object with a ``.write()``
        method.
    arr : LocalArray
        The array to write to disk.
    version : (int, int), optional
        The version number of the file format.  Default: (1, 0)

    Raises
    ------
    ValueError
        If the array cannot be persisted.
    Various other errors
        If the underlying numpy array contains Python objects as part of its
        dtype, the process of pickling them may raise various errors if the
        objects are not picklable.

    """
    if version != (1, 0):
        msg = "Only version (1, 0) is supported, not %s."
        raise ValueError(msg % (version,))

    fp.write(magic(*version))

    distbuffer = arr.__distarray__()
    metadata = {'__version__': distbuffer['__version__'],
                'dim_data': distbuffer['dim_data'],
                }

    write_array_header_1_0(fp, metadata)
    np.save(fp, distbuffer['buffer'])
Example #8
0
def save(file_name, array, axis, full_shape=None, mpi_comm=MPI.COMM_WORLD):
    """
    Save a numpy array from parallel jobs in the MPI communicator.
    The array is gathered along the chosen dimension.

    Parameters
    ----------
    file_name : str
        The numpy array file to load.
    array : numpy.ndarray
        The distributed array.
    axis : int
        The axis on which to distribute the array.
    full_shape : tuple(int), optional
        The size of the full array, by default None.
    mpi_comm : mpi4py.MPI.Comm, optional
        The MPI communicator used to distribute, by default MPI.COMM_WORLD.
    """

    if full_shape is None:
        full_shape = gather_full_shape(array, axis, mpi_comm)

    axis = utils.positive_index(axis, len(full_shape))

    header_offset = None
    if is_root_process(mpi_comm):
        header_dict = {
            'shape': full_shape,
            'fortran_order': False,
            'descr': npformat.dtype_to_descr(array.dtype)
        }

        with open(file_name, 'wb') as fp:
            try:
                npformat.write_array_header_1_0(fp, header_dict)
            except ValueError:
                npformat.write_array_header_2_0(fp, header_dict)

            header_offset = fp.tell()
    header_offset = mpi_comm.bcast(header_offset, root=0)

    i_start, bin_size = distribute_mpi(full_shape[axis], mpi_comm)

    slice_type = create_slice_view(axis,
                                   bin_size,
                                   shape=full_shape,
                                   dtype=array.dtype)
    slice_type.Commit()

    single_slice_extent = slice_type.extent
    if bin_size != 0:
        single_slice_extent /= bin_size

    displacement = header_offset + i_start * single_slice_extent
    base_type = to_mpi_datatype(array.dtype)

    fh = MPI.File.Open(mpi_comm, file_name, MPI.MODE_WRONLY | MPI.MODE_APPEND)
    fh.Set_view(displacement, filetype=slice_type)

    fh.Write_all([array, array.size, base_type])
    fh.Close()
    slice_type.Free()
Example #9
0
def open_memmap(filename, mode='r+', dtype=None, shape=None,
                fortran_order=False, version=(1,0), offset=0):
    """
    Open a .npy file as a memory-mapped array, with offset argument.

    This may be used to read an existing file or create a new one.
    
    :param str filename: The name of the file on disk. This may not be a 
        file-like object.
    :param str mode: The mode to open the file with. In addition to the 
        standard file modes, 'c' is also accepted to mean "copy on write". 
        See `numpy.memmap` for the available mode strings.
    :param dtype dtype: The data type of the array if we are creating a 
        new file in "write" mode.
    :param tuple shape: The shape of the array if we are creating a new 
        file in "write" mode. Shape of (contiguous) slice if opening an 
        existing file.
    :param bool fortran_order: Whether the array should be Fortran-contiguous 
        (True) or C-contiguous (False) if we are creating a new file in 
        "write" mode.
    :param tuple version: If the mode is a "write" mode, then this is the 
        version (major, minor) of the file format used to create the file.
    :param int offset: Number of elements to skip along the first dimension.
    :return numpy.memmap: The memory-mapped array.

    Raises:
    
    * :exc:`ValueError` if the data or the mode is invalid
    * :exc:`IOError` if the file is not found or cannot be opened correctly.
    
    .. seealso:: :func:`numpy.memmap`
    """
    if not isinstance(filename, basestring):
        raise ValueError("Filename must be a string.  Memmap cannot use" \
                         " existing file handles.")

    if 'w' in mode:
        assert offset == 0, "Cannot specify offset when creating memmap"
        # We are creating the file, not reading it.
        # Check if we ought to create the file.
        if version != (1, 0):
            msg = "only support version (1,0) of file format, not %r"
            raise ValueError(msg % (version,))
        # Ensure that the given dtype is an authentic dtype object rather than
        # just something that can be interpreted as a dtype object.
        dtype = np.dtype(dtype)
        if dtype.hasobject:
            msg = "Array can't be memory-mapped: Python objects in dtype."
            raise ValueError(msg)
        d = dict(
            descr=dtype_to_descr(dtype),
            fortran_order=fortran_order,
            shape=shape,
        )
        # If we got here, then it should be safe to create the file.
        fp = open(filename, mode+'b')
        try:
            fp.write(magic(*version))
            write_array_header_1_0(fp, d)
            offset = fp.tell()
        finally:
            fp.close()
    else:
        # Read the header of the file first.
        fp = open(filename, 'rb')
        try:
            version = read_magic(fp)
            if version != (1, 0):
                msg = "only support version (1,0) of file format, not %r"
                raise ValueError(msg % (version,))
            fullshape, fortran_order, dtype = read_array_header_1_0(fp)
            
            if shape:
                length = np.atleast_1d(shape)
                msg = "Specify shape along first dimension only"
                assert length.ndim == 1, msg
            else:
                length = fullshape[0] - offset
            shape = (length,) + fullshape[1:]
            
            if dtype.hasobject:
                msg = "Array can't be memory-mapped: Python objects in dtype."
                raise ValueError(msg)
            
            offset_items = offset * np.prod(fullshape[1:], dtype=int)
            offset_bytes = fp.tell() + offset_items * dtype.itemsize
        finally:
            fp.close()
    
    if fortran_order:
        order = 'F'
    else:
        order = 'C'

    # We need to change a write-only mode to a read-write mode since we've
    # already written data to the file.
    if mode == 'w+':
        mode = 'r+'

    marray = np.memmap(filename, dtype=dtype, shape=shape, order=order,
        mode=mode, offset=offset_bytes)

    return marray