def execute(self, task_status_queue=None):
        api_client = api_factory.get_api(
            self._source_resource.storage_url.scheme)
        if copy_util.check_for_cloud_clobber(self._user_request_args,
                                             api_client,
                                             self._destination_resource):
            log.status.Print(
                copy_util.get_no_clobber_message(
                    self._destination_resource.storage_url))
            if self._send_manifest_messages:
                manifest_util.send_skip_message(
                    task_status_queue, self._source_resource,
                    self._destination_resource,
                    copy_util.get_no_clobber_message(
                        self._destination_resource.storage_url))
            return

        progress_callback = progress_callbacks.FilesAndBytesProgressCallback(
            status_queue=task_status_queue,
            offset=0,
            length=self._source_resource.size,
            source_url=self._source_resource.storage_url,
            destination_url=self._destination_resource.storage_url,
            operation_name=task_status.OperationName.INTRA_CLOUD_COPYING,
            process_id=os.getpid(),
            thread_id=threading.get_ident(),
        )

        request_config = request_config_factory.get_request_config(
            self._destination_resource.storage_url,
            decryption_key_hash=self._source_resource.decryption_key_hash,
            user_request_args=self._user_request_args)
        # TODO(b/161900052): Support all of copy_object's parameters
        result_resource = api_client.copy_object(
            self._source_resource,
            self._destination_resource,
            request_config,
            progress_callback=progress_callback)

        if self._print_created_message:
            log.status.Print('Created: {}'.format(result_resource.storage_url))
        if self._send_manifest_messages:
            manifest_util.send_success_message(
                task_status_queue,
                self._source_resource,
                self._destination_resource,
                md5_hash=result_resource.md5_hash)
        if self._delete_source:
            return task.Output(additional_task_iterators=[[
                delete_object_task.DeleteObjectTask(
                    self._source_resource.storage_url)
            ]],
                               messages=None)
  def __iter__(self):
    self._raise_error_if_source_matches_destination()

    for source in self._source_name_iterator:
      if self._skip_unsupported:
        unsupported_type = resource_util.get_unsupported_object_type(
            source.resource)
      else:
        unsupported_type = None

      if unsupported_type or (source.resource.storage_url.url_string
                              in self._already_completed_sources):
        if unsupported_type:
          message = 'Skipping item {} with unsupported object type: {}'.format(
              source.resource.storage_url, unsupported_type.value)
          if (self._user_request_args and
              self._user_request_args.manifest_path and
              self._task_status_queue):
            manifest_util.send_skip_message(self._task_status_queue,
                                            source.resource,
                                            self._raw_destination, message)
        else:
          message = ('Skipping item {} because manifest marks it as'
                     ' skipped or completed.').format(
                         source.resource.storage_url)
        log.status.Print(message)
        continue

      destination_resource = self._get_copy_destination(self._raw_destination,
                                                        source)

      source_url = source.resource.storage_url
      destination_url = destination_resource.storage_url
      posix_util.raise_if_source_and_destination_not_valid_for_preserve_posix(
          source_url, destination_url, self._user_request_args)
      if (isinstance(source.resource, resource_reference.ObjectResource) and
          isinstance(destination_url, storage_url.FileUrl) and
          destination_url.object_name.endswith(destination_url.delimiter)):
        log.debug('Skipping downloading {} to {} since the destination ends in'
                  ' a file system delimiter.'.format(
                      source_url.versionless_url_string,
                      destination_url.versionless_url_string))
        continue

      if (not self._multiple_sources and source_url.versionless_url_string !=
          source.expanded_url.versionless_url_string):
        # Multiple sources have been already validated in __init__.
        # This check is required for cases where recursion has been requested,
        # but there is only one object that needs to be copied over.
        self._raise_if_destination_is_file_url_and_not_a_directory_or_pipe()

      if source.original_url.generation or self._all_versions:
        source_url_string = source_url.url_string
      else:
        source_url_string = source_url.versionless_url_string

      if self._custom_md5_digest:
        source.resource.md5_hash = self._custom_md5_digest

      log.status.Print('Copying {} to {}'.format(
          source_url_string, destination_url.versionless_url_string))
      if self._task_status_queue:
        self._update_workload_estimation(source.resource)

      yield copy_task_factory.get_copy_task(
          source.resource,
          destination_resource,
          do_not_decompress=self._do_not_decompress,
          print_created_message=self._print_created_message,
          shared_stream=self._shared_stream,
          user_request_args=self._user_request_args)

    if (self._task_status_queue and
        (self._total_file_count > 0 or self._total_size > 0)):
      # Show fraction of total copies completed now that we know totals.
      progress_callbacks.workload_estimator_callback(
          self._task_status_queue,
          item_count=self._total_file_count,
          size=self._total_size)
Ejemplo n.º 3
0
    def execute(self, task_status_queue=None):
        destination_provider = self._destination_resource.storage_url.scheme
        if copy_util.check_for_cloud_clobber(
                self._user_request_args,
                api_factory.get_api(destination_provider),
                self._destination_resource):
            log.status.Print(
                copy_util.get_no_clobber_message(
                    self._destination_resource.storage_url))
            if self._send_manifest_messages:
                manifest_util.send_skip_message(
                    task_status_queue, self._source_resource,
                    self._destination_resource,
                    copy_util.get_no_clobber_message(
                        self._destination_resource.storage_url))
            return

        source_url = self._source_resource.storage_url
        original_source_path = source_url.object_name
        should_gzip_locally = gzip_util.should_gzip_locally(
            getattr(self._user_request_args, 'gzip_settings', None),
            original_source_path)

        if source_url.is_pipe:
            size = None
            source_path = original_source_path
        else:
            if should_gzip_locally:
                source_path = gzip_util.get_temporary_gzipped_file(
                    original_source_path)
            else:
                source_path = original_source_path
            size = os.path.getsize(source_path)

        api_capabilties = api_factory.get_capabilities(destination_provider)
        component_count = copy_component_util.get_component_count(
            size,
            properties.VALUES.storage.parallel_composite_upload_component_size.
            Get(),
            # TODO(b/232550921): This is a big no-no. Keep API references out of the
            # task-level. Porting because in the process of solving a major bug.
            gcs_api.MAX_OBJECTS_PER_COMPOSE_CALL)
        should_perform_single_transfer = (
            source_url.is_pipe or size < self._composite_upload_threshold
            or not self._composite_upload_threshold
            or cloud_api.Capability.COMPOSE_OBJECTS not in api_capabilties
            or not task_util.should_use_parallelism() or component_count <= 1)

        if should_perform_single_transfer:
            task_output = file_part_upload_task.FilePartUploadTask(
                self._source_resource,
                self._destination_resource,
                source_path,
                offset=0,
                length=size,
                user_request_args=self._user_request_args).execute(
                    task_status_queue)
            result_resource = task_util.get_first_matching_message_payload(
                task_output.messages, task.Topic.CREATED_RESOURCE)
            if result_resource:
                if self._print_created_message:
                    log.status.Print('Created: {}'.format(
                        result_resource.storage_url))
                if self._send_manifest_messages:
                    manifest_util.send_success_message(
                        task_status_queue,
                        self._source_resource,
                        self._destination_resource,
                        md5_hash=result_resource.md5_hash)

            if should_gzip_locally:
                # Delete temporary gzipped version of source file.
                os.remove(source_path)
            if self._delete_source:
                # Delete original source file.
                os.remove(self._source_resource.storage_url.object_name)
        else:
            component_offsets_and_lengths = (
                copy_component_util.get_component_offsets_and_lengths(
                    size, component_count))

            tracker_file_path = tracker_file_util.get_tracker_file_path(
                self._destination_resource.storage_url,
                tracker_file_util.TrackerFileType.PARALLEL_UPLOAD,
                source_url=source_url)
            tracker_data = tracker_file_util.read_composite_upload_tracker_file(
                tracker_file_path)

            if tracker_data:
                random_prefix = tracker_data.random_prefix
            else:
                random_prefix = _get_random_prefix()

            tracker_file_util.write_composite_upload_tracker_file(
                tracker_file_path, random_prefix)

            file_part_upload_tasks = []
            for i, (offset,
                    length) in enumerate(component_offsets_and_lengths):

                temporary_component_resource = (
                    copy_component_util.get_temporary_component_resource(
                        self._source_resource, self._destination_resource,
                        random_prefix, i))

                upload_task = file_part_upload_task.FilePartUploadTask(
                    self._source_resource,
                    temporary_component_resource,
                    source_path,
                    offset,
                    length,
                    component_number=i,
                    total_components=len(component_offsets_and_lengths),
                    user_request_args=self._user_request_args)

                file_part_upload_tasks.append(upload_task)

            finalize_upload_task = (
                finalize_composite_upload_task.FinalizeCompositeUploadTask(
                    expected_component_count=len(file_part_upload_tasks),
                    source_resource=self._source_resource,
                    destination_resource=self._destination_resource,
                    source_path=source_path,
                    random_prefix=random_prefix,
                    delete_source=self._delete_source,
                    print_created_message=self._print_created_message,
                    user_request_args=self._user_request_args))

            return task.Output(additional_task_iterators=[
                file_part_upload_tasks, [finalize_upload_task]
            ],
                               messages=None)
Ejemplo n.º 4
0
    def execute(self, task_status_queue=None):
        """Copies file by downloading and uploading in parallel."""
        # TODO (b/168712813): Add option to use the Data Transfer component.
        destination_client = api_factory.get_api(
            self._destination_resource.storage_url.scheme)
        if copy_util.check_for_cloud_clobber(self._user_request_args,
                                             destination_client,
                                             self._destination_resource):
            log.status.Print(
                copy_util.get_no_clobber_message(
                    self._destination_resource.storage_url))
            if self._send_manifest_messages:
                manifest_util.send_skip_message(
                    task_status_queue, self._source_resource,
                    self._destination_resource,
                    copy_util.get_no_clobber_message(
                        self._destination_resource.storage_url))
            return

        progress_callback = progress_callbacks.FilesAndBytesProgressCallback(
            status_queue=task_status_queue,
            offset=0,
            length=self._source_resource.size,
            source_url=self._source_resource.storage_url,
            destination_url=self._destination_resource.storage_url,
            operation_name=task_status.OperationName.DAISY_CHAIN_COPYING,
            process_id=os.getpid(),
            thread_id=threading.get_ident(),
        )

        buffer_controller = BufferController(
            self._source_resource,
            self._destination_resource.storage_url.scheme,
            self._user_request_args, progress_callback)

        # Perform download in a separate thread so that upload can be performed
        # simultaneously.
        buffer_controller.start_download_thread()

        content_type = (self._source_resource.content_type
                        or request_config_factory.DEFAULT_CONTENT_TYPE)

        request_config = request_config_factory.get_request_config(
            self._destination_resource.storage_url,
            content_type=content_type,
            md5_hash=self._get_md5_hash(),
            size=self._source_resource.size,
            user_request_args=self._user_request_args)

        result_resource = None
        try:
            upload_strategy = upload_util.get_upload_strategy(
                api=destination_client,
                object_length=self._source_resource.size)
            result_resource = destination_client.upload_object(
                buffer_controller.readable_stream,
                self._destination_resource,
                request_config,
                source_resource=self._source_resource,
                upload_strategy=upload_strategy)
        except _AbruptShutdownError:
            # Not raising daisy_chain_stream.exception_raised here because we want
            # to wait for the download thread to finish.
            pass
        except Exception as e:  # pylint: disable=broad-except
            # For all the other errors raised during upload, we want to to make
            # sure that the download thread is terminated before we re-reaise.
            # Hence we catch any exception and store it to be re-raised later.
            buffer_controller.shutdown(e)

        buffer_controller.wait_for_download_thread_to_terminate()
        buffer_controller.readable_stream.close()
        if buffer_controller.exception_raised:
            raise buffer_controller.exception_raised

        if result_resource:
            if self._print_created_message:
                log.status.Print('Created: {}'.format(
                    result_resource.storage_url))
            if self._send_manifest_messages:
                manifest_util.send_success_message(
                    task_status_queue,
                    self._source_resource,
                    self._destination_resource,
                    md5_hash=result_resource.md5_hash)

        if self._delete_source:
            return task.Output(additional_task_iterators=[[
                delete_object_task.DeleteObjectTask(
                    self._source_resource.storage_url)
            ]],
                               messages=None)
    def execute(self, task_status_queue=None):
        """Creates appropriate download tasks."""
        destination_url = self._destination_resource.storage_url
        # We need to call os.remove here for two reasons:
        # 1. It saves on disk space during a transfer.
        # 2. Os.rename fails if a file exists at the destination. Avoiding this by
        # removing files after a download makes us susceptible to a race condition
        # between two running instances of gcloud storage. See the following PR for
        # more information: https://github.com/GoogleCloudPlatform/gsutil/pull/1202.
        if destination_url.exists():
            if self._user_request_args and self._user_request_args.no_clobber:
                log.status.Print(
                    copy_util.get_no_clobber_message(destination_url))
                if self._send_manifest_messages:
                    manifest_util.send_skip_message(
                        task_status_queue, self._source_resource,
                        self._destination_resource,
                        copy_util.get_no_clobber_message(destination_url))
                return
            os.remove(destination_url.object_name)

        temporary_download_file_exists = (
            self._temporary_destination_resource.storage_url.exists())
        if temporary_download_file_exists and os.path.getsize(
                self._temporary_destination_resource.storage_url.object_name
        ) > self._source_resource.size:
            self._restart_download()

        if _should_perform_sliced_download(self._source_resource,
                                           self._destination_resource):
            download_component_task_list, finalize_sliced_download_task_list = (
                self._get_sliced_download_tasks())

            _, found_tracker_file = (
                tracker_file_util.read_or_create_download_tracker_file(
                    self._source_resource,
                    self._temporary_destination_resource.storage_url,
                    total_components=len(download_component_task_list),
                ))
            if found_tracker_file:
                log.debug(
                    'Resuming sliced download with {} components.'.format(
                        len(download_component_task_list)))
            else:
                if temporary_download_file_exists:
                    # Component count may have changed, invalidating earlier download.
                    self._restart_download()
                log.debug(
                    'Launching sliced download with {} components.'.format(
                        len(download_component_task_list)))

            copy_component_util.create_file_if_needed(
                self._source_resource, self._temporary_destination_resource)

            return task.Output(additional_task_iterators=[
                download_component_task_list,
                finalize_sliced_download_task_list,
            ],
                               messages=None)

        part_download_task_output = file_part_download_task.FilePartDownloadTask(
            self._source_resource,
            self._temporary_destination_resource,
            offset=0,
            length=self._source_resource.size,
            do_not_decompress=self._do_not_decompress,
            strategy=self._strategy,
            user_request_args=self._user_request_args,
        ).execute(task_status_queue=task_status_queue)

        temporary_file_url = self._temporary_destination_resource.storage_url
        download_util.decompress_or_rename_file(
            self._source_resource,
            temporary_file_url.object_name,
            destination_url.object_name,
            do_not_decompress_flag=self._do_not_decompress)

        if self._user_request_args and self._user_request_args.system_posix_data:
            posix_util.set_posix_attributes_on_file(
                destination_url.object_name,
                task_util.get_first_matching_message_payload(
                    part_download_task_output.messages,
                    task.Topic.API_DOWNLOAD_RESULT).posix_attributes)

        # For sliced download, cleanup is done in the finalized sliced download task
        # We perform cleanup here for all other types in case some corrupt files
        # were left behind.
        tracker_file_util.delete_download_tracker_files(temporary_file_url)

        if self._print_created_message:
            log.status.Print('Created: {}'.format(destination_url))
        if self._send_manifest_messages:
            manifest_util.send_success_message(
                task_status_queue,
                self._source_resource,
                self._destination_resource,
                md5_hash=task_util.get_first_matching_message_payload(
                    part_download_task_output.messages, task.Topic.MD5))

        if self._delete_source:
            return task.Output(additional_task_iterators=[[
                delete_object_task.DeleteObjectTask(
                    self._source_resource.storage_url),
            ]],
                               messages=None)