def test_configure_seen_column(self): self.writer.append({'x': 3, 'y': 2}) self.writer.configure(('x',), num_keep_alive_refs=2, max_chunk_length=1) self.cpp_writer_mock.ConfigureChunker.assert_called_with( 0, pybind.ConstantChunkerOptions( num_keep_alive_refs=2, max_chunk_length=1))
def test_configure_unseen_column(self): self.writer.append({'x': 3, 'y': 2}) self.writer.configure(('z',), num_keep_alive_refs=2, max_chunk_length=1) # The configure call should be delayed until the column has been observed. self.cpp_writer_mock.ConfigureChunker.assert_not_called() # Still not seen. self.writer.append({'a': 4}) self.cpp_writer_mock.ConfigureChunker.assert_not_called() self.writer.append({'z': 5}) self.cpp_writer_mock.ConfigureChunker.assert_called_with( 3, pybind.ConstantChunkerOptions( num_keep_alive_refs=2, max_chunk_length=1))
def configure(self, path: Tuple[Union[int, str], ...], *, num_keep_alive_refs: int, max_chunk_length: Optional[int]): """Override chunking options for a single column. Args: path: Structured path to the column to configure. num_keep_alive_refs: Override value for `num_keep_alive_refs` i.e the size of the circular buffer of the most recently added data. max_chunk_length: Override value for the chunk length used by this column. When set to None, an auto tuned chunk length is used. When set to a number, a constant chunk length is used. Raises: ValueError: If num_keep_alive_refs is < 1. ValueError: If max_chunk_length set to a value < 1 or to a value > than num_keep_alive_refs. """ if num_keep_alive_refs < 1: raise ValueError( f'num_keep_alive_refs ({num_keep_alive_refs}) must be a positive ' f'integer') if max_chunk_length is not None and ( max_chunk_length < 1 or max_chunk_length > num_keep_alive_refs): raise ValueError( f'max_chunk_length ({max_chunk_length}) must be None or a positive ' f'integer <= num_keep_alive_refs ({num_keep_alive_refs})') if max_chunk_length is None: chunker_options = pybind.AutoTunedChunkerOptions( num_keep_alive_refs=num_keep_alive_refs, throughput_weight=1.0) else: chunker_options = pybind.ConstantChunkerOptions( max_chunk_length=max_chunk_length, num_keep_alive_refs=num_keep_alive_refs) if path in self._path_to_column_index: self._writer.ConfigureChunker(self._path_to_column_index[path], chunker_options) else: self._path_to_column_config[path] = chunker_options