def get_component_count(file_size, target_component_size, max_components):
    """Returns the # components a file would be split into for a composite upload.

  Args:
    file_size (int|None): Total byte size of file being divided into components.
      None if could not be determined.
    target_component_size (int|str): Target size for each component if not total
      components isn't capped by max_components. May be byte count int or size
      string (e.g. "50M").
    max_components (int|None): Limit on allowed components regardless of
      file_size and target_component_size. None indicates no limit.

  Returns:
    int: Number of components to split file into for composite upload.
  """
    if file_size is None:
        return 1
    if isinstance(target_component_size, int):
        target_component_size_bytes = target_component_size
    else:
        target_component_size_bytes = scaled_integer.ParseInteger(
            target_component_size)

    return min(math.ceil(file_size / target_component_size_bytes),
               max_components if max_components is not None else float('inf'))
コード例 #2
0
    def run(self):
        if self._serialization_data is not None:
            apitools_upload = transfer.Upload.FromData(
                self._source_stream,
                json.dumps(self._serialization_data),
                self._gcs_api.client.http,
                auto_transfer=False,
                gzip_encoded=self._request_config.gzip_encoded)
        else:
            apitools_upload = transfer.Upload(
                self._source_stream,
                self._content_type,
                auto_transfer=False,
                chunksize=scaled_integer.ParseInteger(
                    properties.VALUES.storage.upload_chunk_size.Get()),
                gzip_encoded=self._request_config.gzip_encoded,
                total_size=self._request_config.size)
            apitools_upload.strategy = transfer.RESUMABLE_UPLOAD
        apitools_upload.bytes_http = self._http_client

        if not apitools_upload.initialized:
            self._gcs_api.client.objects.Insert(
                self._get_validated_insert_request(), upload=apitools_upload)

        if self._tracker_callback is not None:
            self._tracker_callback(apitools_upload.serialization_data)

        if self._request_config.gzip_encoded:
            http_response = apitools_upload.StreamInChunks()
        else:
            http_response = apitools_upload.StreamMedia()

        return self._gcs_api.client.objects.ProcessHttpResponse(
            self._gcs_api.client.objects.GetMethodConfig('Insert'),
            http_response)
コード例 #3
0
    def __init__(self,
                 source_resource,
                 destination_resource,
                 delete_source=False,
                 print_created_message=False,
                 user_request_args=None):
        """Initializes task.

    Args:
      source_resource (resource_reference.FileObjectResource): Must contain
        local filesystem path to upload object. Does not need to contain
        metadata.
      destination_resource (resource_reference.ObjectResource|UnknownResource):
        Must contain the full object path. Directories will not be accepted.
        Existing objects at the this location will be overwritten.
      delete_source (bool): If copy completes successfully, delete the source
        object afterwards.
      print_created_message (bool): Print a message containing the versioned
        URL of the copy result.
      user_request_args (UserRequestArgs|None): Values for RequestConfig.
    """
        super(FileUploadTask,
              self).__init__(source_resource,
                             destination_resource,
                             user_request_args=user_request_args)
        self._delete_source = delete_source
        self._print_created_message = print_created_message

        self.parallel_processing_key = (
            self._destination_resource.storage_url.url_string)

        self._composite_upload_threshold = scaled_integer.ParseInteger(
            properties.VALUES.storage.parallel_composite_upload_threshold.Get(
            ))
コード例 #4
0
def _get_component_count(file_size, api_max_component_count):
  """Returns the number of components to use for an upload."""
  preferred_component_size = scaled_integer.ParseInteger(
      properties.VALUES.storage.parallel_composite_upload_component_size.Get())
  component_count = math.ceil(file_size / preferred_component_size)

  if component_count < 2:
    return 2
  if component_count > api_max_component_count:
    return api_max_component_count
  return component_count
def _should_perform_sliced_download(source_resource, destination_resource):
    """Returns True if conditions are right for a sliced download."""
    if destination_resource.storage_url.is_pipe:
        # Can't write to different indices of pipe.
        return False
    if (not source_resource.crc32c_hash
            and properties.VALUES.storage.check_hashes.Get() !=
            properties.CheckHashes.NEVER.value):
        # Do not perform sliced download if hash validation is not possible.
        return False

    threshold = scaled_integer.ParseInteger(
        properties.VALUES.storage.sliced_object_download_threshold.Get())
    component_size = scaled_integer.ParseInteger(
        properties.VALUES.storage.sliced_object_download_component_size.Get())
    # TODO(b/183017513): Only perform sliced downloads with parallelism.
    api_capabilities = api_factory.get_capabilities(
        source_resource.storage_url.scheme)
    return (source_resource.size and threshold != 0
            and source_resource.size > threshold and component_size
            and cloud_api.Capability.SLICED_DOWNLOAD in api_capabilities
            and task_util.should_use_parallelism())
コード例 #6
0
    def __init__(self):
        """Initializes response handler for requests downloads."""
        super(_StorageStreamResponseHandler, self).__init__(use_stream=True)
        self._stream = None
        self._digesters = {}
        self._processed_bytes = 0,
        self._progress_callback = None

        self._chunk_size = scaled_integer.ParseInteger(
            properties.VALUES.storage.download_chunk_size.Get())
        # If progress callbacks is called more frequently than every 512 KB, it
        # can degrate performance.
        self._progress_callback_threshold = max(
            MINIMUM_PROGRESS_CALLBACK_THRESHOLD, self._chunk_size)
コード例 #7
0
ファイル: concepts.py プロジェクト: txl302/RA-project
 def Convert(self, string):
   if not string:
     return None
   try:
     value = scaled_integer.ParseInteger(
         string, default_unit=self._default_unit, type_abbr=self._type_abbr)
     if self._output_unit_value:
       value //= self._output_unit_value
     return value
   except ValueError as e:
     raise exceptions.ParseError(
         self.GetPresentationName(),
         'Failed to parse binary/decimal scaled integer [{}]: {}.'.format(
             string, _SubException(e)))
コード例 #8
0
    def download_object(self,
                        cloud_resource,
                        download_stream,
                        compressed_encoding=False,
                        decryption_wrapper=None,
                        digesters=None,
                        download_strategy=cloud_api.DownloadStrategy.ONE_SHOT,
                        progress_callback=None,
                        start_byte=0,
                        end_byte=None):
        """See super class."""
        extra_args = {}
        if cloud_resource.generation:
            extra_args['VersionId'] = cloud_resource.generation

        if download_strategy == cloud_api.DownloadStrategy.RESUMABLE:
            response = self.client.get_object(
                Bucket=cloud_resource.bucket,
                Key=cloud_resource.name,
                Range='bytes={}-'.format(start_byte),
            )
            processed_bytes = start_byte
            for chunk in response['Body'].iter_chunks(
                    scaled_integer.ParseInteger(
                        properties.VALUES.storage.download_chunk_size.Get())):
                download_stream.write(chunk)
                processed_bytes += len(chunk)
                if progress_callback:
                    progress_callback(processed_bytes)
        else:
            # TODO(b/172480278) Conditionally call get_object for smaller object.
            self.client.download_fileobj(cloud_resource.bucket,
                                         cloud_resource.name,
                                         download_stream,
                                         Callback=progress_callback,
                                         ExtraArgs=extra_args)

        # Download callback doesn't give us streaming data, so we have to
        # read whole downloaded file to update digests.
        if digesters:
            with files.BinaryFileReader(
                    download_stream.name) as completed_download_stream:
                completed_download_stream.seek(0)
                for hash_algorithm in digesters:
                    digesters[
                        hash_algorithm] = hash_util.get_hash_from_file_stream(
                            completed_download_stream, hash_algorithm)

        return self._get_content_encoding(cloud_resource)
コード例 #9
0
    def _GetChunkSize(self):
        """Returns the property defined chunksize corrected for server granularity.

    Chunk size for GCS must be a multiple of 256 KiB. This functions rounds up
    the property defined chunk size to the nearest chunk size interval.
    """
        gcs_chunk_granularity = 256 * 1024  # 256 KiB
        chunksize = scaled_integer.ParseInteger(
            properties.VALUES.storage.upload_chunk_size.Get())
        if chunksize == 0:
            chunksize = None  # Use apitools default (1048576 B)
        elif chunksize % gcs_chunk_granularity != 0:
            chunksize += gcs_chunk_granularity - (chunksize %
                                                  gcs_chunk_granularity)
        return chunksize
コード例 #10
0
 def _get_upload(self):
     """Returns an apitools upload class used for a new transfer."""
     resource_args = self._request_config.resource_args
     size = getattr(resource_args, 'size', None)
     max_retries = properties.VALUES.storage.max_retries.GetInt()
     apitools_upload = transfer.Upload(
         self._source_stream,
         resource_args.content_type,
         auto_transfer=False,
         chunksize=scaled_integer.ParseInteger(
             properties.VALUES.storage.upload_chunk_size.Get()),
         gzip_encoded=self._should_gzip_in_flight,
         total_size=size,
         num_retries=max_retries)
     apitools_upload.strategy = transfer.RESUMABLE_UPLOAD
     return apitools_upload
コード例 #11
0
def get_upload_strategy(api, object_length):
    """Determines if resumbale uplaod should be performed.

  Args:
    api (CloudApi): An api instance to check if it supports resumable upload.
    object_length (int): Length of the data to be uploaded.

  Returns:
    bool: True if resumable upload can be performed.
  """
    resumable_threshold = scaled_integer.ParseInteger(
        properties.VALUES.storage.resumable_threshold.Get())
    if (object_length >= resumable_threshold
            and cloud_api.Capability.RESUMABLE_UPLOAD in api.capabilities):
        return cloud_api.UploadStrategy.RESUMABLE
    else:
        return cloud_api.UploadStrategy.SIMPLE
    def __init__(self,
                 source_resource,
                 destination_resource,
                 delete_source=False,
                 do_not_decompress=False,
                 print_created_message=False,
                 user_request_args=None):
        """Initializes task.

    Args:
      source_resource (ObjectResource): Must contain
        the full path of object to download, including bucket. Directories
        will not be accepted. Does not need to contain metadata.
      destination_resource (FileObjectResource|UnknownResource): Must contain
        local filesystem path to destination object. Does not need to contain
        metadata.
      delete_source (bool): If copy completes successfully, delete the source
        object afterwards.
      do_not_decompress (bool): Prevents automatically decompressing
        downloaded gzips.
      print_created_message (bool): Print a message containing the versioned
        URL of the copy result.
      user_request_args (UserRequestArgs|None): Values for RequestConfig.
    """
        super(FileDownloadTask,
              self).__init__(source_resource,
                             destination_resource,
                             user_request_args=user_request_args)
        self._delete_source = delete_source
        self._do_not_decompress = do_not_decompress
        self._print_created_message = print_created_message

        self._temporary_destination_resource = (
            self._get_temporary_destination_resource())

        if (self._source_resource.size
                and self._source_resource.size >= scaled_integer.ParseInteger(
                    properties.VALUES.storage.resumable_threshold.Get())):
            self._strategy = cloud_api.DownloadStrategy.RESUMABLE
        else:
            self._strategy = cloud_api.DownloadStrategy.ONE_SHOT

        self.parallel_processing_key = (
            self._destination_resource.storage_url.url_string)
コード例 #13
0
  def __init__(self, source_resource, destination_resource):
    """Initializes task.

    Args:
      source_resource (resource_reference.FileObjectResource): Must contain
          local filesystem path to upload object. Does not need to contain
          metadata.
      destination_resource (resource_reference.ObjectResource|UnknownResource):
          Must contain the full object path. Directories will not be accepted.
          Existing objects at the this location will be overwritten.
    """
    super(FileUploadTask, self).__init__()
    self._source_resource = source_resource
    self._destination_resource = destination_resource
    self.parallel_processing_key = (
        self._destination_resource.storage_url.url_string)

    self._composite_upload_threshold = scaled_integer.ParseInteger(
        properties.VALUES.storage.parallel_composite_upload_threshold.Get())
コード例 #14
0
    def _download_object(self, cloud_resource, download_stream, digesters,
                         progress_callback, start_byte):
        get_object_args = {
            'Bucket': cloud_resource.bucket,
            'Key': cloud_resource.name,
            'Range': 'bytes={}-'.format(start_byte),
        }
        if cloud_resource.generation is not None:
            get_object_args['VersionId'] = str(cloud_resource.generation)
        response = self.client.get_object(**get_object_args)
        processed_bytes = start_byte
        for chunk in response['Body'].iter_chunks(
                scaled_integer.ParseInteger(
                    properties.VALUES.storage.download_chunk_size.Get())):
            download_stream.write(chunk)

            for hash_object in digesters.values():
                hash_object.update(chunk)

            processed_bytes += len(chunk)
            if progress_callback:
                progress_callback(processed_bytes)
        return response.get('ContentEncoding')
コード例 #15
0
    def copy_object(self,
                    source_resource,
                    destination_resource,
                    progress_callback=None,
                    request_config=None):
        """See super class."""
        # TODO(b/161898251): Implement encryption and decryption.
        if not request_config:
            request_config = GcsRequestConfig()

        destination_metadata = getattr(destination_resource, 'metadata', None)
        if not destination_metadata:
            destination_metadata = gcs_metadata_util.get_apitools_metadata_from_url(
                destination_resource.storage_url)
            if source_resource.metadata:
                gcs_metadata_util.copy_select_object_metadata(
                    source_resource.metadata, destination_metadata)

        if request_config.max_bytes_per_call:
            max_bytes_per_call = request_config.max_bytes_per_call
        else:
            max_bytes_per_call = scaled_integer.ParseInteger(
                properties.VALUES.storage.copy_chunk_size.Get())

        if request_config.predefined_acl_string:
            predefined_acl = getattr(
                self.messages.StorageObjectsRewriteRequest.
                DestinationPredefinedAclValueValuesEnum,
                request_config.predefined_acl_string)
        else:
            predefined_acl = None

        if source_resource.generation is None:
            source_generation = None
        else:
            source_generation = int(source_resource.generation)

        tracker_file_path = tracker_file_util.get_tracker_file_path(
            destination_resource.storage_url,
            tracker_file_util.TrackerFileType.REWRITE,
            source_resource.storage_url)
        rewrite_parameters_hash = tracker_file_util.hash_gcs_rewrite_parameters_for_tracker_file(
            source_resource,
            destination_resource,
            destination_metadata,
            request_config=request_config)
        try:
            resume_rewrite_token = tracker_file_util.read_rewrite_tracker_file(
                tracker_file_path, rewrite_parameters_hash)
            log.debug('Found rewrite token. Resuming copy.')
        except files.MissingFileError:
            resume_rewrite_token = None
            log.debug('No rewrite token found. Starting copy from scratch.')

        while True:
            request = self.messages.StorageObjectsRewriteRequest(
                sourceBucket=source_resource.storage_url.bucket_name,
                sourceObject=source_resource.storage_url.object_name,
                destinationBucket=destination_resource.storage_url.bucket_name,
                destinationObject=destination_resource.storage_url.object_name,
                object=destination_metadata,
                sourceGeneration=source_generation,
                ifGenerationMatch=request_config.precondition_generation_match,
                ifMetagenerationMatch=(
                    request_config.precondition_metageneration_match),
                destinationPredefinedAcl=predefined_acl,
                rewriteToken=resume_rewrite_token,
                maxBytesRewrittenPerCall=max_bytes_per_call)
            rewrite_response = self.client.objects.Rewrite(request)
            processed_bytes = rewrite_response.totalBytesRewritten
            if progress_callback:
                progress_callback(processed_bytes)

            if rewrite_response.done:
                break
            elif not resume_rewrite_token:
                resume_rewrite_token = rewrite_response.rewriteToken
                tracker_file_util.write_rewrite_tracker_file(
                    tracker_file_path, rewrite_parameters_hash,
                    rewrite_response.rewriteToken)

        tracker_file_util.delete_tracker_file(tracker_file_path)
        return gcs_metadata_util.get_object_resource_from_metadata(
            rewrite_response.resource)
コード例 #16
0
    def copy_object(self,
                    source_resource,
                    destination_resource,
                    request_config,
                    progress_callback=None):
        """See super class."""
        destination_metadata = getattr(destination_resource, 'metadata', None)
        if not destination_metadata:
            destination_metadata = gcs_metadata_util.get_apitools_metadata_from_url(
                destination_resource.storage_url)
        if source_resource.metadata:
            gcs_metadata_util.copy_select_object_metadata(
                source_resource.metadata, destination_metadata, request_config)
        gcs_metadata_util.update_object_metadata_from_request_config(
            destination_metadata, request_config)

        if request_config.max_bytes_per_call:
            max_bytes_per_call = request_config.max_bytes_per_call
        else:
            max_bytes_per_call = scaled_integer.ParseInteger(
                properties.VALUES.storage.copy_chunk_size.Get())

        if request_config.predefined_acl_string:
            predefined_acl = getattr(
                self.messages.StorageObjectsRewriteRequest.
                DestinationPredefinedAclValueValuesEnum,
                request_config.predefined_acl_string)
        else:
            predefined_acl = None

        if source_resource.generation is None:
            source_generation = None
        else:
            source_generation = int(source_resource.generation)

        tracker_file_path = tracker_file_util.get_tracker_file_path(
            destination_resource.storage_url,
            tracker_file_util.TrackerFileType.REWRITE,
            source_url=source_resource.storage_url)
        rewrite_parameters_hash = tracker_file_util.hash_gcs_rewrite_parameters_for_tracker_file(
            source_resource,
            destination_resource,
            destination_metadata,
            request_config=request_config)
        try:
            resume_rewrite_token = tracker_file_util.read_rewrite_tracker_file(
                tracker_file_path, rewrite_parameters_hash)
            log.debug('Found rewrite token. Resuming copy.')
        except files.MissingFileError:
            resume_rewrite_token = None
            log.debug('No rewrite token found. Starting copy from scratch.')

        with self._encryption_headers_for_rewrite_call_context(request_config):
            while True:
                request = self.messages.StorageObjectsRewriteRequest(
                    sourceBucket=source_resource.storage_url.bucket_name,
                    sourceObject=source_resource.storage_url.object_name,
                    destinationBucket=destination_resource.storage_url.
                    bucket_name,
                    destinationObject=destination_resource.storage_url.
                    object_name,
                    object=destination_metadata,
                    sourceGeneration=source_generation,
                    ifGenerationMatch=copy_util.get_generation_match_value(
                        request_config),
                    ifMetagenerationMatch=request_config.
                    precondition_metageneration_match,
                    destinationPredefinedAcl=predefined_acl,
                    rewriteToken=resume_rewrite_token,
                    maxBytesRewrittenPerCall=max_bytes_per_call)

                encryption_key = getattr(request_config.resource_args,
                                         'encryption_key', None)
                if encryption_key and encryption_key.type == encryption_util.KeyType.CMEK:
                    # This key is also provided in destination_metadata.kmsKeyName by
                    # update_object_metadata_from_request_config. This has no effect on
                    # the copy object request, which references the field below, and is a
                    # side-effect of logic required for uploads and compose operations.
                    request.destinationKmsKeyName = encryption_key.key

                rewrite_response = self.client.objects.Rewrite(request)
                processed_bytes = rewrite_response.totalBytesRewritten
                if progress_callback:
                    progress_callback(processed_bytes)

                if rewrite_response.done:
                    break

                if not resume_rewrite_token:
                    resume_rewrite_token = rewrite_response.rewriteToken
                    if source_resource.size >= scaled_integer.ParseInteger(
                            properties.VALUES.storage.resumable_threshold.Get(
                            )):
                        tracker_file_util.write_rewrite_tracker_file(
                            tracker_file_path, rewrite_parameters_hash,
                            rewrite_response.rewriteToken)

        tracker_file_util.delete_tracker_file(tracker_file_path)
        return gcs_metadata_util.get_object_resource_from_metadata(
            rewrite_response.resource)