Esempio n. 1
0
 def flush(self, force: bool = False) -> None:
     """Write buffered data to S3."""
     if self.closed:  # pylint: disable=using-constant-test
         raise RuntimeError("I/O operation on closed file.")
     if self.writable() and self._buffer.closed is False:
         total_size: int = self._buffer.tell()
         if total_size < _MIN_WRITE_BLOCK and force is False:
             return None
         if total_size == 0:
             return None
         _logger.debug("Flushing: %s bytes", total_size)
         self._mpu = self._mpu or _utils.try_it(
             f=self._client.create_multipart_upload,
             ex=_S3_RETRYABLE_ERRORS,
             base=0.5,
             max_num_tries=6,
             Bucket=self._bucket,
             Key=self._key,
             **get_botocore_valid_kwargs(
                 function_name="create_multipart_upload",
                 s3_additional_kwargs=self._s3_additional_kwargs),
         )
         self._buffer.seek(0)
         for chunk_size in _utils.get_even_chunks_sizes(
                 total_size=total_size,
                 chunk_size=_MIN_WRITE_BLOCK,
                 upper_bound=False):
             _logger.debug("chunk_size: %s bytes", chunk_size)
             self._parts_count += 1
             self._upload_proxy.upload(
                 bucket=self._bucket,
                 key=self._key,
                 part=self._parts_count,
                 upload_id=self._mpu["UploadId"],
                 data=self._buffer.read(chunk_size),
                 boto3_session=self._boto3_session,
                 boto3_kwargs=get_botocore_valid_kwargs(
                     function_name="upload_part",
                     s3_additional_kwargs=self._s3_additional_kwargs),
             )
         self._buffer.seek(0)
         self._buffer.truncate(0)
         self._buffer.close()
         self._buffer = io.BytesIO()
     return None
Esempio n. 2
0
 def _fetch_range_proxy(self, start: int, end: int) -> bytes:
     _logger.debug("Fetching: s3://%s/%s - Range: %s-%s", self._bucket, self._key, start, end)
     s3_client: boto3.client = _utils.client(service_name="s3", session=self._boto3_session)
     boto3_kwargs: Dict[str, Any] = get_botocore_valid_kwargs(
         function_name="get_object", s3_additional_kwargs=self._s3_additional_kwargs
     )
     cpus: int = _utils.ensure_cpu_count(use_threads=self._use_threads)
     range_size: int = end - start
     if cpus < 2 or range_size < (2 * _MIN_PARALLEL_READ_BLOCK):
         return _fetch_range(
             range_values=(start, end),
             bucket=self._bucket,
             key=self._key,
             s3_client=s3_client,
             boto3_kwargs=boto3_kwargs,
             version_id=self._version_id,
         )[1]
     sizes: Tuple[int, ...] = _utils.get_even_chunks_sizes(
         total_size=range_size, chunk_size=_MIN_PARALLEL_READ_BLOCK, upper_bound=False
     )
     ranges: List[Tuple[int, int]] = []
     chunk_start: int = start
     for size in sizes:
         ranges.append((chunk_start, chunk_start + size))
         chunk_start += size
     with concurrent.futures.ThreadPoolExecutor(max_workers=cpus) as executor:
         return self._merge_range(
             ranges=list(
                 executor.map(
                     _fetch_range,
                     ranges,
                     itertools.repeat(self._bucket),
                     itertools.repeat(self._key),
                     itertools.repeat(s3_client),
                     itertools.repeat(boto3_kwargs),
                     itertools.repeat(self._version_id),
                 )
             ),
         )
Esempio n. 3
0
def test_get_even_chunks_sizes(total_size, chunk_size, upper_bound, result):
    assert get_even_chunks_sizes(total_size, chunk_size, upper_bound) == result