Ejemplo n.º 1
0
 def test_configure_seen_column(self):
   self.writer.append({'x': 3, 'y': 2})
   self.writer.configure(('x',), num_keep_alive_refs=2, max_chunk_length=1)
   self.cpp_writer_mock.ConfigureChunker.assert_called_with(
       0,
       pybind.ConstantChunkerOptions(
           num_keep_alive_refs=2, max_chunk_length=1))
Ejemplo n.º 2
0
  def test_configure_unseen_column(self):
    self.writer.append({'x': 3, 'y': 2})
    self.writer.configure(('z',), num_keep_alive_refs=2, max_chunk_length=1)

    # The configure call should be delayed until the column has been observed.
    self.cpp_writer_mock.ConfigureChunker.assert_not_called()

    # Still not seen.
    self.writer.append({'a': 4})
    self.cpp_writer_mock.ConfigureChunker.assert_not_called()

    self.writer.append({'z': 5})
    self.cpp_writer_mock.ConfigureChunker.assert_called_with(
        3,
        pybind.ConstantChunkerOptions(
            num_keep_alive_refs=2, max_chunk_length=1))
Ejemplo n.º 3
0
    def configure(self, path: Tuple[Union[int, str], ...], *,
                  num_keep_alive_refs: int, max_chunk_length: Optional[int]):
        """Override chunking options for a single column.

    Args:
      path: Structured path to the column to configure.
      num_keep_alive_refs: Override value for `num_keep_alive_refs` i.e the size
        of the circular buffer of the most recently added data.
      max_chunk_length: Override value for the chunk length used by this column.
        When set to None, an auto tuned chunk length is used. When set to a
        number, a constant chunk length is used.

    Raises:
      ValueError: If num_keep_alive_refs is < 1.
      ValueError: If max_chunk_length set to a value < 1 or to a value > than
        num_keep_alive_refs.
    """
        if num_keep_alive_refs < 1:
            raise ValueError(
                f'num_keep_alive_refs ({num_keep_alive_refs}) must be a positive '
                f'integer')
        if max_chunk_length is not None and (
                max_chunk_length < 1
                or max_chunk_length > num_keep_alive_refs):
            raise ValueError(
                f'max_chunk_length ({max_chunk_length}) must be None or a positive '
                f'integer <= num_keep_alive_refs ({num_keep_alive_refs})')

        if max_chunk_length is None:
            chunker_options = pybind.AutoTunedChunkerOptions(
                num_keep_alive_refs=num_keep_alive_refs, throughput_weight=1.0)
        else:
            chunker_options = pybind.ConstantChunkerOptions(
                max_chunk_length=max_chunk_length,
                num_keep_alive_refs=num_keep_alive_refs)

        if path in self._path_to_column_index:
            self._writer.ConfigureChunker(self._path_to_column_index[path],
                                          chunker_options)
        else:
            self._path_to_column_config[path] = chunker_options