def arange(self, start_in, shape, block_shape, step=1, dtype=None) -> BlockArray: assert step == 1 if dtype is None: dtype = np.__getattribute__( str(np.result_type(start_in, shape[0] + start_in))) # Generate ranges per block. grid = ArrayGrid(shape, block_shape, dtype.__name__) rarr = BlockArray(grid, self.cm) for _, grid_entry in enumerate(grid.get_entry_iterator()): syskwargs = { "grid_entry": grid_entry, "grid_shape": grid.grid_shape } start = start_in + block_shape[0] * grid_entry[0] entry_shape = grid.get_block_shape(grid_entry) stop = start + entry_shape[0] rarr.blocks[grid_entry].oid = self.cm.arange(start, stop, step, dtype, syskwargs=syskwargs) return rarr
def loadtxt(self, fname, dtype=float, comments='# ', delimiter=' ', converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None, num_workers=4) -> BlockArray: # pylint: disable=unused-variable bytes_per_char, bytes_per_row, bytes_per_col, num_cols = storage_utils.get_np_txt_info( fname, comments, delimiter) chars_per_row = bytes_per_row // bytes_per_char assert np.allclose(float(chars_per_row), bytes_per_row / bytes_per_char) comment_lines, trailing_newlines = storage_utils.get_np_comments( fname, comments) nonrow_chars = trailing_newlines for line in comment_lines: nonrow_chars += len(line) file_size = storage_utils.get_file_size(fname) file_chars = file_size // bytes_per_char assert np.allclose(float(file_chars), file_size / bytes_per_char) row_chars = file_chars - nonrow_chars num_rows = row_chars // chars_per_row assert np.allclose(float(num_rows), float(row_chars / chars_per_row)) num_rows_final = num_rows - skiprows if max_rows is not None: num_rows_final = (num_rows_final, max_rows) row_batches: storage_utils.Batch = storage_utils.Batch.from_num_batches( num_rows_final, num_workers) grid = ArrayGrid( shape=(num_rows_final, num_cols), block_shape=(row_batches.batch_size, num_cols), dtype=np.float64.__name__ if dtype is float else dtype.__name__) result: BlockArray = BlockArray(grid, cm=self.cm) for i, grid_entry in enumerate(grid.get_entry_iterator()): row_start, row_end = row_batches.batches[i] batch_skiprows = skiprows + row_start batch_max_rows = grid.get_block_shape(grid_entry)[0] assert batch_max_rows == row_end - row_start result.blocks[grid_entry].oid = self.loadtxt_block( fname, dtype=dtype, comments=comments, delimiter=delimiter, converters=converters, skiprows=batch_skiprows, usecols=usecols, unpack=unpack, ndmin=ndmin, encoding=encoding, max_rows=batch_max_rows, syskwargs={ "grid_entry": grid_entry, "grid_shape": grid.grid_shape }) return result