def _init_from_array(self, array): """Initialize the object from an array. Sets the the header_length so large that it is possible to append to the array. Returns ------- h_bytes : io.BytesIO Contains the oversized header bytes """ self.shape = (0, ) + array.shape[1:] self.dtype = array.dtype self.itemsize = array.itemsize # Read header data from array and set modify it to be large for the length # 1_0 is the same for 2_0 d = npformat.header_data_from_array_1_0(array) d['shape'] = (self.MAX_SHAPE_LEN, ) + d['shape'][1:] d['fortran_order'] = False # Write a prefix for a very long array to make it large enough for appending new # data h_bytes = io.BytesIO() npformat.write_array_header_2_0(h_bytes, d) self.header_length = h_bytes.tell() # Write header prefix to file self.fs.seek(0) h_bytes.seek(0) self.fs.write(h_bytes.read(self.HEADER_DATA_OFFSET)) # Write header data for the zero length to make it a valid file self._prepare_header_data() self._write_header_data()
def _prepare_header_data(self): # Make header data d = { 'shape': self.shape, 'fortran_order': self.fortran_order, 'descr': npformat.dtype_to_descr(self.dtype) } h_bytes = io.BytesIO() npformat.write_array_header_2_0(h_bytes, d) # Pad the end of the header fill_len = self.header_length - h_bytes.tell() if fill_len < 0: raise OverflowError( "File {} cannot be appended. The header is too short.".format( self.filename)) elif fill_len > 0: h_bytes.write(b'\x20' * fill_len) h_bytes.seek(0) self._header_bytes_to_write = h_bytes.read()
def concat(files, output, force, type, axis): output = Path(output) files = [Path(file) for file in files] if any(not file.is_file() for file in files): raise FileNotFoundError("One or more files not found") if output.is_file() and not force: raise FileExistsError("Use -f to overwrite existing file") print("Concatenating") for idx, file in enumerate(files): print("\t", idx + 1, file.name) print("Into", "\n\t", output.name) with open(output, "wb") as outputfile: if type == "npy": fmt.write_array_header_2_0(outputfile, get_header(files, axis)) for file in files: with open(file, "rb") as inputfile: if type == "npy": inputfile.seek(128) shutil.copyfileobj(inputfile, outputfile) print("Concatenation complete")
def save(file_name, array, axis, full_shape=None, mpi_comm=MPI.COMM_WORLD): """ Save a numpy array from parallel jobs in the MPI communicator. The array is gathered along the chosen dimension. Parameters ---------- file_name : str The numpy array file to load. array : numpy.ndarray The distributed array. axis : int The axis on which to distribute the array. full_shape : tuple(int), optional The size of the full array, by default None. mpi_comm : mpi4py.MPI.Comm, optional The MPI communicator used to distribute, by default MPI.COMM_WORLD. """ if full_shape is None: full_shape = gather_full_shape(array, axis, mpi_comm) axis = utils.positive_index(axis, len(full_shape)) header_offset = None if is_root_process(mpi_comm): header_dict = { 'shape': full_shape, 'fortran_order': False, 'descr': npformat.dtype_to_descr(array.dtype) } with open(file_name, 'wb') as fp: try: npformat.write_array_header_1_0(fp, header_dict) except ValueError: npformat.write_array_header_2_0(fp, header_dict) header_offset = fp.tell() header_offset = mpi_comm.bcast(header_offset, root=0) i_start, bin_size = distribute_mpi(full_shape[axis], mpi_comm) slice_type = create_slice_view(axis, bin_size, shape=full_shape, dtype=array.dtype) slice_type.Commit() single_slice_extent = slice_type.extent if bin_size != 0: single_slice_extent /= bin_size displacement = header_offset + i_start * single_slice_extent base_type = to_mpi_datatype(array.dtype) fh = MPI.File.Open(mpi_comm, file_name, MPI.MODE_WRONLY | MPI.MODE_APPEND) fh.Set_view(displacement, filetype=slice_type) fh.Write_all([array, array.size, base_type]) fh.Close() slice_type.Free()