Exemple #1
0
  def copy(
      self,
      src,
      dest,
      dest_kms_key_name=None,
      max_bytes_rewritten_per_call=None):
    """Copies the given GCS object from src to dest.

    Args:
      src: GCS file path pattern in the form gs://<bucket>/<name>.
      dest: GCS file path pattern in the form gs://<bucket>/<name>.
      dest_kms_key_name: Experimental. No backwards compatibility guarantees.
        Encrypt dest with this Cloud KMS key. If None, will use dest bucket
        encryption defaults.
      max_bytes_rewritten_per_call: Experimental. No backwards compatibility
        guarantees. Each rewrite API call will return after these many bytes.
        Used for testing.

    Raises:
      TimeoutError: on timeout.
    """
    src_bucket, src_path = parse_gcs_path(src)
    dest_bucket, dest_path = parse_gcs_path(dest)
    request = storage.StorageObjectsRewriteRequest(
        sourceBucket=src_bucket,
        sourceObject=src_path,
        destinationBucket=dest_bucket,
        destinationObject=dest_path,
        destinationKmsKeyName=dest_kms_key_name,
        maxBytesRewrittenPerCall=max_bytes_rewritten_per_call)
    response = self.client.objects.Rewrite(request)
    while not response.done:
      _LOGGER.debug(
          'Rewrite progress: %d of %d bytes, %s to %s',
          response.totalBytesRewritten,
          response.objectSize,
          src,
          dest)
      request.rewriteToken = response.rewriteToken
      response = self.client.objects.Rewrite(request)
      if self._rewrite_cb is not None:
        self._rewrite_cb(response)

    _LOGGER.debug('Rewrite done: %s to %s', src, dest)
Exemple #2
0
    def copy_batch(self,
                   src_dest_pairs,
                   dest_kms_key_name=None,
                   max_bytes_rewritten_per_call=None):
        """Copies the given GCS object from src to dest.

    Args:
      src_dest_pairs: list of (src, dest) tuples of gs://<bucket>/<name> files
                      paths to copy from src to dest, not to exceed
                      MAX_BATCH_OPERATION_SIZE in length.
      dest_kms_key_name: Experimental. No backwards compatibility guarantees.
        Encrypt dest with this Cloud KMS key. If None, will use dest bucket
        encryption defaults.
      max_bytes_rewritten_per_call: Experimental. No backwards compatibility
        guarantees. Each rewrite call will return after these many bytes. Used
        primarily for testing.

    Returns: List of tuples of (src, dest, exception) in the same order as the
             src_dest_pairs argument, where exception is None if the operation
             succeeded or the relevant exception if the operation failed.
    """
        if not src_dest_pairs:
            return []
        pair_to_request = {}
        for pair in src_dest_pairs:
            src_bucket, src_path = parse_gcs_path(pair[0])
            dest_bucket, dest_path = parse_gcs_path(pair[1])
            request = storage.StorageObjectsRewriteRequest(
                sourceBucket=src_bucket,
                sourceObject=src_path,
                destinationBucket=dest_bucket,
                destinationObject=dest_path,
                destinationKmsKeyName=dest_kms_key_name,
                maxBytesRewrittenPerCall=max_bytes_rewritten_per_call)
            pair_to_request[pair] = request
        pair_to_status = {}
        while True:
            pairs_in_batch = list(set(src_dest_pairs) - set(pair_to_status))
            if not pairs_in_batch:
                break
            batch_request = BatchApiRequest(
                batch_url=GCS_BATCH_ENDPOINT,
                retryable_codes=retry.SERVER_ERROR_OR_TIMEOUT_CODES,
                response_encoding='utf-8')
            for pair in pairs_in_batch:
                batch_request.Add(self.client.objects, 'Rewrite',
                                  pair_to_request[pair])
            api_calls = batch_request.Execute(self.client._http)  # pylint: disable=protected-access
            for pair, api_call in zip(pairs_in_batch, api_calls):
                src, dest = pair
                response = api_call.response
                if self._rewrite_cb is not None:
                    self._rewrite_cb(response)
                if api_call.is_error:
                    exception = api_call.exception
                    # Translate 404 to the appropriate not found exception.
                    if isinstance(exception,
                                  HttpError) and exception.status_code == 404:
                        exception = (GcsIOError(
                            errno.ENOENT, 'Source file not found: %s' % src))
                    pair_to_status[pair] = exception
                elif not response.done:
                    _LOGGER.debug('Rewrite progress: %d of %d bytes, %s to %s',
                                  response.totalBytesRewritten,
                                  response.objectSize, src, dest)
                    pair_to_request[pair].rewriteToken = response.rewriteToken
                else:
                    _LOGGER.debug('Rewrite done: %s to %s', src, dest)
                    pair_to_status[pair] = None

        return [(pair[0], pair[1], pair_to_status[pair])
                for pair in src_dest_pairs]