Example #1
0
    def execute(self, task_status_queue=None):
        log.status.Print('Updating {}...'.format(self._bucket_resource))
        provider = self._bucket_resource.storage_url.scheme
        request_config = request_config_factory.get_request_config(
            self._bucket_resource.storage_url,
            user_request_args=self._user_request_args)

        try:
            api_factory.get_api(provider).patch_bucket(
                self._bucket_resource, request_config=request_config)
        except errors.GcsApiError as e:
            # Service agent does not have the encrypter/decrypter role.
            if (e.payload.status_code == 403
                    and request_config.resource_args.default_encryption_key):

                service_agent = api_factory.get_api(
                    provider).get_service_agent()
                requests.AddCryptoKeyPermission(
                    request_config.resource_args.default_encryption_key,
                    'serviceAccount:' + service_agent)

                api_factory.get_api(provider).patch_bucket(
                    self._bucket_resource, request_config=request_config)
            else:
                raise

        if task_status_queue:
            progress_callbacks.increment_count_callback(task_status_queue)
    def execute(self, task_status_queue=None):
        """Runs download to stream."""
        progress_callback = progress_callbacks.FilesAndBytesProgressCallback(
            status_queue=task_status_queue,
            offset=0,
            length=self._source_resource.size,
            source_url=self._source_resource.storage_url,
            destination_url=self._download_stream.name,
            operation_name=task_status.OperationName.DOWNLOADING,
            process_id=os.getpid(),
            thread_id=threading.get_ident(),
        )

        request_config = request_config_factory.get_request_config(
            self._source_resource.storage_url,
            decryption_key_hash=self._source_resource.decryption_key_hash,
            user_request_args=self._user_request_args,
        )

        provider = self._source_resource.storage_url.scheme
        api_factory.get_api(provider).download_object(
            self._source_resource,
            self._download_stream,
            request_config,
            download_strategy=cloud_api.DownloadStrategy.ONE_SHOT,
            progress_callback=progress_callback)

        if self._print_created_message:
            log.status.Print('Created: {}'.format(self._download_stream.name))
    def _upload_using_managed_transfer_utility(self, source_stream,
                                               destination_resource,
                                               extra_args):
        """Uploads the data using boto3's managed transfer utility.

    Calls the upload_fileobj method which performs multi-threaded multipart
    upload automatically. Performs slightly better than put_object API method.
    However, upload_fileobj cannot perform data intergrity checks and we have
    to use put_object method in such cases.

    Args:
      source_stream (a file-like object): A file-like object to upload. At a
        minimum, it must implement the read method, and must return bytes.
      destination_resource (resource_reference.ObjectResource|UnknownResource):
        Represents the metadata for the destination object.
      extra_args (dict): Extra arguments that may be passed to the client
        operation.

    Returns:
      resource_reference.ObjectResource with uploaded object's metadata.
    """
        bucket_name = destination_resource.storage_url.bucket_name
        object_name = destination_resource.storage_url.object_name
        self.client.upload_fileobj(Fileobj=source_stream,
                                   Bucket=bucket_name,
                                   Key=object_name,
                                   ExtraArgs=extra_args)
        return self.get_object_metadata(
            bucket_name, object_name,
            request_config_factory.get_request_config(
                storage_url.CloudUrl(scheme=storage_url.ProviderPrefix.S3)))
    def execute(self, task_status_queue=None):
        """Performs a simple upload. See base class for information on args."""
        api = api_factory.get_api(
            self._destination_resource.storage_url.scheme)
        request_config = request_config_factory.get_request_config(
            self._destination_resource.storage_url,
            content_type=upload_util.get_content_type(
                self._source_resource.storage_url.object_name,
                self._source_resource.storage_url.is_pipe),
            md5_hash=self._source_resource.md5_hash,
            size=self._length)

        digesters = upload_util.get_digesters(self._source_resource,
                                              self._destination_resource)
        source_stream = upload_util.get_stream(
            self._source_resource,
            length=self._length,
            digesters=digesters,
            task_status_queue=task_status_queue,
            destination_resource=self._destination_resource)

        with source_stream:
            uploaded_object_resource = api.upload_object(
                source_stream,
                self._destination_resource,
                request_config,
                source_resource=self._source_resource,
                upload_strategy=cloud_api.UploadStrategy.SIMPLE)

        upload_util.validate_uploaded_object(digesters,
                                             uploaded_object_resource,
                                             task_status_queue)
Example #5
0
 def execute(self, task_status_queue=None):
     log.status.Print('Creating {}...'.format(self._bucket_resource))
     provider = self._bucket_resource.storage_url.scheme
     request_config = request_config_factory.get_request_config(
         self._bucket_resource.storage_url,
         user_request_args=self._user_request_args)
     api_factory.get_api(provider).create_bucket(
         self._bucket_resource, request_config=request_config)
    def execute(self, task_status_queue=None):
        provider = self._object_url.scheme
        request_config = request_config_factory.get_request_config(
            self._object_url, user_request_args=self._user_request_args)

        if self._verbose:
            log.status.Print('Removing {}...'.format(self._object_url))
        api_factory.get_api(provider).delete_object(self._object_url,
                                                    request_config)
        if task_status_queue:
            progress_callbacks.increment_count_callback(task_status_queue)
    def execute(self, task_status_queue=None):
        api_client = api_factory.get_api(
            self._source_resource.storage_url.scheme)
        if copy_util.check_for_cloud_clobber(self._user_request_args,
                                             api_client,
                                             self._destination_resource):
            log.status.Print(
                copy_util.get_no_clobber_message(
                    self._destination_resource.storage_url))
            if self._send_manifest_messages:
                manifest_util.send_skip_message(
                    task_status_queue, self._source_resource,
                    self._destination_resource,
                    copy_util.get_no_clobber_message(
                        self._destination_resource.storage_url))
            return

        progress_callback = progress_callbacks.FilesAndBytesProgressCallback(
            status_queue=task_status_queue,
            offset=0,
            length=self._source_resource.size,
            source_url=self._source_resource.storage_url,
            destination_url=self._destination_resource.storage_url,
            operation_name=task_status.OperationName.INTRA_CLOUD_COPYING,
            process_id=os.getpid(),
            thread_id=threading.get_ident(),
        )

        request_config = request_config_factory.get_request_config(
            self._destination_resource.storage_url,
            decryption_key_hash=self._source_resource.decryption_key_hash,
            user_request_args=self._user_request_args)
        # TODO(b/161900052): Support all of copy_object's parameters
        result_resource = api_client.copy_object(
            self._source_resource,
            self._destination_resource,
            request_config,
            progress_callback=progress_callback)

        if self._print_created_message:
            log.status.Print('Created: {}'.format(result_resource.storage_url))
        if self._send_manifest_messages:
            manifest_util.send_success_message(
                task_status_queue,
                self._source_resource,
                self._destination_resource,
                md5_hash=result_resource.md5_hash)
        if self._delete_source:
            return task.Output(additional_task_iterators=[[
                delete_object_task.DeleteObjectTask(
                    self._source_resource.storage_url)
            ]],
                               messages=None)
    def _decrypt_resource_if_necessary(self, resource):
        should_decrypt_resource = (
            cloud_api.Capability.ENCRYPTION in self._client.capabilities
            and self._fields_scope != cloud_api.FieldsScope.SHORT
            and isinstance(resource, resource_reference.ObjectResource)
            and resource.decryption_key_hash)

        if not should_decrypt_resource:
            return resource

        request_config = request_config_factory.get_request_config(
            resource.storage_url,
            decryption_key_hash=resource.decryption_key_hash,
            error_on_missing_key=self._error_on_missing_key)
        return self._client.get_object_metadata(resource.bucket, resource.name,
                                                request_config)
    def execute(self, task_status_queue=None):
        del task_status_queue  # Unused.
        request_config = request_config_factory.get_request_config(
            self._destination_resource.storage_url,
            user_request_args=self._user_request_args)

        provider = self._destination_resource.storage_url.scheme
        created_resource = api_factory.get_api(provider).compose_objects(
            self._source_resources,
            self._destination_resource,
            request_config,
            original_source_resource=self._original_source_resource)
        return task.Output(messages=[
            task.Message(topic=task.Topic.CREATED_RESOURCE,
                         payload=created_resource),
        ],
                           additional_task_iterators=[])
 def execute(self, task_status_queue=None):
     log.status.Print('Removing {}...'.format(self._url))
     api_client = api_factory.get_api(self._url.scheme)
     request_config = request_config_factory.get_request_config(self._url)
     try:
         api_client.delete_bucket(self._url.bucket_name, request_config)
         if task_status_queue:
             progress_callbacks.increment_count_callback(task_status_queue)
     # pylint:disable=broad-except
     except Exception as e:
         # pylint:enable=broad-except
         if 'not empty' in str(e):
             raise type(
                 e
             )('Bucket is not empty. To delete all objects and then delete'
               ' bucket, use: gcloud storage rm -r')
         else:
             raise
Example #11
0
    def execute(self, task_status_queue=None):
        """Performs download."""
        digesters = _get_digesters(self._component_number,
                                   self._source_resource)

        progress_callback = progress_callbacks.FilesAndBytesProgressCallback(
            status_queue=task_status_queue,
            offset=self._offset,
            length=self._length,
            source_url=self._source_resource.storage_url,
            destination_url=self._destination_resource.storage_url,
            component_number=self._component_number,
            total_components=self._total_components,
            operation_name=task_status.OperationName.DOWNLOADING,
            process_id=os.getpid(),
            thread_id=threading.get_ident(),
        )

        request_config = request_config_factory.get_request_config(
            self._source_resource.storage_url,
            decryption_key_hash=self._source_resource.decryption_key_hash,
            user_request_args=self._user_request_args,
        )

        if self._source_resource.size and self._component_number is not None:
            try:
                api_download_result = self._perform_component_download(
                    request_config, progress_callback, digesters)
            # pylint:disable=broad-except
            except Exception as e:
                # pylint:enable=broad-except
                return task.Output(
                    additional_task_iterators=None,
                    messages=[task.Message(topic=task.Topic.ERROR, payload=e)])

        elif self._strategy is cloud_api.DownloadStrategy.RESUMABLE:
            api_download_result = self._perform_resumable_download(
                request_config, progress_callback, digesters)
        else:
            api_download_result = self._perform_one_shot_download(
                request_config, progress_callback, digesters)
        return self._get_output(digesters, api_download_result)
    def _fetch_objects(self, bucket_name):
        """Fetch all objects for the given bucket that match the URL."""
        needs_further_expansion = (contains_wildcard(self._url.object_name)
                                   or self._all_versions)
        if not needs_further_expansion:
            try:
                # Assume that the url represents a single object.
                resource = self._client.get_object_metadata(
                    bucket_name,
                    self._url.object_name,
                    # TODO(b/197754758): add user request args from surface.
                    request_config_factory.get_request_config(self._url),
                    self._url.generation,
                    self._fields_scope)

                return [self._decrypt_resource_if_necessary(resource)]
            except api_errors.NotFoundError:
                # Object does not exist. Could be a prefix.
                pass
        return self._expand_object_path(bucket_name)
Example #13
0
    def _run_download(self, start_byte):
        """Performs the download operation."""
        request_config = request_config_factory.get_request_config(
            self._source_resource.storage_url,
            user_request_args=self._user_request_args)

        client = api_factory.get_api(self._source_resource.storage_url.scheme)
        try:
            if self._source_resource.size != 0:
                client.download_object(
                    self._source_resource,
                    self.writable_stream,
                    request_config,
                    start_byte=start_byte,
                    download_strategy=cloud_api.DownloadStrategy.ONE_SHOT)
        except _AbruptShutdownError:
            # Shutdown caused by interruption from another thread.
            pass
        except Exception as e:  # pylint: disable=broad-except
            # The stack trace of the exception raised in the thread is not visible
            # in the caller thread. Hence we catch any exception so that we can
            # re-raise them from the parent thread.
            self.shutdown(e)
    def execute(self, task_status_queue=None):
        """Performs upload."""
        digesters = self._get_digesters()
        destination_url = self._destination_resource.storage_url
        provider = destination_url.scheme
        api = api_factory.get_api(provider)
        request_config = request_config_factory.get_request_config(
            destination_url,
            content_type=upload_util.get_content_type(
                self._source_path, self._source_resource.storage_url.is_pipe),
            md5_hash=self._source_resource.md5_hash,
            size=self._length,
            user_request_args=self._user_request_args)

        if self._component_number is None:
            source_resource_for_metadata = self._source_resource
        else:
            source_resource_for_metadata = None

        with self._get_upload_stream(digesters,
                                     task_status_queue) as source_stream:
            upload_strategy = upload_util.get_upload_strategy(
                api, self._length)
            if upload_strategy == cloud_api.UploadStrategy.RESUMABLE:
                tracker_file_path = tracker_file_util.get_tracker_file_path(
                    self._destination_resource.storage_url,
                    tracker_file_util.TrackerFileType.UPLOAD,
                    component_number=self._component_number)

                encryption_key = encryption_util.get_encryption_key()
                if encryption_key:
                    encryption_key_hash = encryption_key.sha256
                else:
                    encryption_key_hash = None

                complete = False
                tracker_callback = functools.partial(
                    tracker_file_util.write_resumable_upload_tracker_file,
                    tracker_file_path, complete, encryption_key_hash)

                tracker_data = tracker_file_util.read_resumable_upload_tracker_file(
                    tracker_file_path)

                if (tracker_data is None or tracker_data.encryption_key_sha256
                        != encryption_key_hash):
                    serialization_data = None
                else:
                    # TODO(b/190093425): Print a better message for component uploads once
                    # the final destination resource is available in ComponentUploadTask.
                    log.status.Print('Resuming upload for ' +
                                     destination_url.object_name)

                    serialization_data = tracker_data.serialization_data

                    if tracker_data.complete:
                        try:
                            metadata_request_config = request_config_factory.get_request_config(
                                destination_url,
                                decryption_key_hash=encryption_key_hash)
                            # Providing a decryption key means the response will include the
                            # object's hash if the keys match, and raise an error if they do
                            # not. This is desirable since we want to re-upload objects with
                            # the wrong key, and need the object's hash for validation.
                            destination_resource = api.get_object_metadata(
                                destination_url.bucket_name,
                                destination_url.object_name,
                                metadata_request_config)
                        except api_errors.CloudApiError:
                            # Any problem fetching existing object metadata can be ignored,
                            # since we'll just re-upload the object.
                            pass
                        else:
                            # The API call will not error if we provide an encryption key but
                            # the destination is unencrypted, hence the additional (defensive)
                            # check below.
                            destination_key_hash = destination_resource.decryption_key_hash
                            if (destination_key_hash == encryption_key_hash
                                    and self._existing_destination_is_valid(
                                        destination_resource)):
                                return self._get_output(destination_resource)

                attempt_upload = functools.partial(
                    api.upload_object,
                    source_stream,
                    self._destination_resource,
                    request_config,
                    source_resource=source_resource_for_metadata,
                    serialization_data=serialization_data,
                    tracker_callback=tracker_callback,
                    upload_strategy=upload_strategy)

                def _handle_resumable_upload_error(exc_type, exc_value,
                                                   exc_traceback, state):
                    """Returns true if resumable upload should retry on error argument."""
                    del exc_traceback  # Unused.
                    if not (exc_type is api_errors.NotFoundError
                            or getattr(exc_value, 'status_code', None) == 410):

                        if exc_type is api_errors.ResumableUploadAbortError:
                            tracker_file_util.delete_tracker_file(
                                tracker_file_path)

                        # Otherwise the error is probably a persistent network issue
                        # that is already retried by API clients, so we'll keep the tracker
                        # file to allow the user to retry the upload in a separate run.

                        return False

                    tracker_file_util.delete_tracker_file(tracker_file_path)

                    if state.retrial == 0:
                        # Ping bucket to see if it exists.
                        try:
                            api.get_bucket(self._destination_resource.
                                           storage_url.bucket_name)
                        except api_errors.CloudApiError as e:
                            # The user may not have permission to view the bucket metadata,
                            # so the ping may still be valid for access denied errors.
                            status = getattr(e, 'status_code', None)
                            if status not in (401, 403):
                                raise

                    return True

                # Convert seconds to miliseconds by multiplying by 1000.
                destination_resource = retry.Retryer(
                    max_retrials=properties.VALUES.storage.max_retries.GetInt(
                    ),
                    wait_ceiling_ms=properties.VALUES.storage.max_retry_delay.
                    GetInt() * 1000,
                    exponential_sleep_multiplier=(
                        properties.VALUES.storage.exponential_sleep_multiplier.
                        GetInt())).RetryOnException(
                            attempt_upload,
                            sleep_ms=properties.VALUES.storage.
                            base_retry_delay.GetInt() * 1000,
                            should_retry_if=_handle_resumable_upload_error)

                tracker_data = tracker_file_util.read_resumable_upload_tracker_file(
                    tracker_file_path)
                if tracker_data is not None:
                    if self._component_number is not None:
                        tracker_file_util.write_resumable_upload_tracker_file(
                            tracker_file_path,
                            complete=True,
                            encryption_key_sha256=tracker_data.
                            encryption_key_sha256,
                            serialization_data=tracker_data.serialization_data)
                    else:
                        tracker_file_util.delete_tracker_file(
                            tracker_file_path)
            else:
                destination_resource = api.upload_object(
                    source_stream,
                    self._destination_resource,
                    request_config,
                    source_resource=source_resource_for_metadata,
                    upload_strategy=upload_strategy)

            upload_util.validate_uploaded_object(digesters,
                                                 destination_resource,
                                                 task_status_queue)

        return self._get_output(destination_resource)
Example #15
0
    def execute(self, task_status_queue=None):
        """Copies file by downloading and uploading in parallel."""
        # TODO (b/168712813): Add option to use the Data Transfer component.
        destination_client = api_factory.get_api(
            self._destination_resource.storage_url.scheme)
        if copy_util.check_for_cloud_clobber(self._user_request_args,
                                             destination_client,
                                             self._destination_resource):
            log.status.Print(
                copy_util.get_no_clobber_message(
                    self._destination_resource.storage_url))
            if self._send_manifest_messages:
                manifest_util.send_skip_message(
                    task_status_queue, self._source_resource,
                    self._destination_resource,
                    copy_util.get_no_clobber_message(
                        self._destination_resource.storage_url))
            return

        progress_callback = progress_callbacks.FilesAndBytesProgressCallback(
            status_queue=task_status_queue,
            offset=0,
            length=self._source_resource.size,
            source_url=self._source_resource.storage_url,
            destination_url=self._destination_resource.storage_url,
            operation_name=task_status.OperationName.DAISY_CHAIN_COPYING,
            process_id=os.getpid(),
            thread_id=threading.get_ident(),
        )

        buffer_controller = BufferController(
            self._source_resource,
            self._destination_resource.storage_url.scheme,
            self._user_request_args, progress_callback)

        # Perform download in a separate thread so that upload can be performed
        # simultaneously.
        buffer_controller.start_download_thread()

        content_type = (self._source_resource.content_type
                        or request_config_factory.DEFAULT_CONTENT_TYPE)

        request_config = request_config_factory.get_request_config(
            self._destination_resource.storage_url,
            content_type=content_type,
            md5_hash=self._get_md5_hash(),
            size=self._source_resource.size,
            user_request_args=self._user_request_args)

        result_resource = None
        try:
            upload_strategy = upload_util.get_upload_strategy(
                api=destination_client,
                object_length=self._source_resource.size)
            result_resource = destination_client.upload_object(
                buffer_controller.readable_stream,
                self._destination_resource,
                request_config,
                source_resource=self._source_resource,
                upload_strategy=upload_strategy)
        except _AbruptShutdownError:
            # Not raising daisy_chain_stream.exception_raised here because we want
            # to wait for the download thread to finish.
            pass
        except Exception as e:  # pylint: disable=broad-except
            # For all the other errors raised during upload, we want to to make
            # sure that the download thread is terminated before we re-reaise.
            # Hence we catch any exception and store it to be re-raised later.
            buffer_controller.shutdown(e)

        buffer_controller.wait_for_download_thread_to_terminate()
        buffer_controller.readable_stream.close()
        if buffer_controller.exception_raised:
            raise buffer_controller.exception_raised

        if result_resource:
            if self._print_created_message:
                log.status.Print('Created: {}'.format(
                    result_resource.storage_url))
            if self._send_manifest_messages:
                manifest_util.send_success_message(
                    task_status_queue,
                    self._source_resource,
                    self._destination_resource,
                    md5_hash=result_resource.md5_hash)

        if self._delete_source:
            return task.Output(additional_task_iterators=[[
                delete_object_task.DeleteObjectTask(
                    self._source_resource.storage_url)
            ]],
                               messages=None)