Exemple #1
0
    def _init_from_array(self, array):
        """Initialize the object from an array.

        Sets the the header_length so large that it is possible to append to the array.

        Returns
        -------
        h_bytes : io.BytesIO
            Contains the oversized header bytes

        """
        self.shape = (0, ) + array.shape[1:]
        self.dtype = array.dtype
        self.itemsize = array.itemsize

        # Read header data from array and set modify it to be large for the length
        # 1_0 is the same for 2_0
        d = npformat.header_data_from_array_1_0(array)
        d['shape'] = (self.MAX_SHAPE_LEN, ) + d['shape'][1:]
        d['fortran_order'] = False

        # Write a prefix for a very long array to make it large enough for appending new
        # data
        h_bytes = io.BytesIO()
        npformat.write_array_header_2_0(h_bytes, d)
        self.header_length = h_bytes.tell()

        # Write header prefix to file
        self.fs.seek(0)
        h_bytes.seek(0)
        self.fs.write(h_bytes.read(self.HEADER_DATA_OFFSET))

        # Write header data for the zero length to make it a valid file
        self._prepare_header_data()
        self._write_header_data()
Exemple #2
0
    def _prepare_header_data(self):
        # Make header data
        d = {
            'shape': self.shape,
            'fortran_order': self.fortran_order,
            'descr': npformat.dtype_to_descr(self.dtype)
        }

        h_bytes = io.BytesIO()
        npformat.write_array_header_2_0(h_bytes, d)

        # Pad the end of the header
        fill_len = self.header_length - h_bytes.tell()
        if fill_len < 0:
            raise OverflowError(
                "File {} cannot be appended. The header is too short.".format(
                    self.filename))
        elif fill_len > 0:
            h_bytes.write(b'\x20' * fill_len)

        h_bytes.seek(0)
        self._header_bytes_to_write = h_bytes.read()
Exemple #3
0
def concat(files, output, force, type, axis):
    output = Path(output)
    files = [Path(file) for file in files]
    if any(not file.is_file() for file in files):
        raise FileNotFoundError("One or more files not found")
    if output.is_file() and not force:
        raise FileExistsError("Use -f to overwrite existing file")

    print("Concatenating")
    for idx, file in enumerate(files):
        print("\t", idx + 1, file.name)
    print("Into", "\n\t", output.name)

    with open(output, "wb") as outputfile:
        if type == "npy":
            fmt.write_array_header_2_0(outputfile, get_header(files, axis))
        for file in files:
            with open(file, "rb") as inputfile:
                if type == "npy":
                    inputfile.seek(128)
                shutil.copyfileobj(inputfile, outputfile)

    print("Concatenation complete")
Exemple #4
0
def save(file_name, array, axis, full_shape=None, mpi_comm=MPI.COMM_WORLD):
    """
    Save a numpy array from parallel jobs in the MPI communicator.
    The array is gathered along the chosen dimension.

    Parameters
    ----------
    file_name : str
        The numpy array file to load.
    array : numpy.ndarray
        The distributed array.
    axis : int
        The axis on which to distribute the array.
    full_shape : tuple(int), optional
        The size of the full array, by default None.
    mpi_comm : mpi4py.MPI.Comm, optional
        The MPI communicator used to distribute, by default MPI.COMM_WORLD.
    """

    if full_shape is None:
        full_shape = gather_full_shape(array, axis, mpi_comm)

    axis = utils.positive_index(axis, len(full_shape))

    header_offset = None
    if is_root_process(mpi_comm):
        header_dict = {
            'shape': full_shape,
            'fortran_order': False,
            'descr': npformat.dtype_to_descr(array.dtype)
        }

        with open(file_name, 'wb') as fp:
            try:
                npformat.write_array_header_1_0(fp, header_dict)
            except ValueError:
                npformat.write_array_header_2_0(fp, header_dict)

            header_offset = fp.tell()
    header_offset = mpi_comm.bcast(header_offset, root=0)

    i_start, bin_size = distribute_mpi(full_shape[axis], mpi_comm)

    slice_type = create_slice_view(axis,
                                   bin_size,
                                   shape=full_shape,
                                   dtype=array.dtype)
    slice_type.Commit()

    single_slice_extent = slice_type.extent
    if bin_size != 0:
        single_slice_extent /= bin_size

    displacement = header_offset + i_start * single_slice_extent
    base_type = to_mpi_datatype(array.dtype)

    fh = MPI.File.Open(mpi_comm, file_name, MPI.MODE_WRONLY | MPI.MODE_APPEND)
    fh.Set_view(displacement, filetype=slice_type)

    fh.Write_all([array, array.size, base_type])
    fh.Close()
    slice_type.Free()