Example #1
0
    def delete_batch(self, paths):
        """Deletes the objects at the given GCS paths.

    Args:
      paths: List of GCS file path patterns in the form gs://<bucket>/<name>,
             not to exceed MAX_BATCH_OPERATION_SIZE in length.

    Returns: List of tuples of (path, exception) in the same order as the paths
             argument, where exception is None if the operation succeeded or
             the relevant exception if the operation failed.
    """
        if not paths:
            return []
        batch_request = BatchApiRequest(
            batch_url=GCS_BATCH_ENDPOINT,
            retryable_codes=retry.SERVER_ERROR_OR_TIMEOUT_CODES)
        for path in paths:
            bucket, object_path = parse_gcs_path(path)
            request = storage.StorageObjectsDeleteRequest(bucket=bucket,
                                                          object=object_path)
            batch_request.Add(self.client.objects, 'Delete', request)
        api_calls = batch_request.Execute(self.client._http)  # pylint: disable=protected-access
        result_statuses = []
        for i, api_call in enumerate(api_calls):
            path = paths[i]
            exception = None
            if api_call.is_error:
                exception = api_call.exception
                # Return success when the file doesn't exist anymore for idempotency.
                if isinstance(exception,
                              HttpError) and exception.status_code == 404:
                    exception = None
            result_statuses.append((path, exception))
        return result_statuses
Example #2
0
    def copy_batch(self, src_dest_pairs):
        """Copies the given GCS object from src to dest.

    Args:
      src_dest_pairs: list of (src, dest) tuples of gs://<bucket>/<name> files
                      paths to copy from src to dest, not to exceed
                      MAX_BATCH_OPERATION_SIZE in length.

    Returns: List of tuples of (src, dest, exception) in the same order as the
             src_dest_pairs argument, where exception is None if the operation
             succeeded or the relevant exception if the operation failed.
    """
        if not src_dest_pairs:
            return []
        batch_request = BatchApiRequest(
            batch_url=GCS_BATCH_ENDPOINT,
            retryable_codes=retry.SERVER_ERROR_OR_TIMEOUT_CODES)
        for src, dest in src_dest_pairs:
            src_bucket, src_path = parse_gcs_path(src)
            dest_bucket, dest_path = parse_gcs_path(dest)
            request = storage.StorageObjectsCopyRequest(
                sourceBucket=src_bucket,
                sourceObject=src_path,
                destinationBucket=dest_bucket,
                destinationObject=dest_path)
            batch_request.Add(self.client.objects, 'Copy', request)
        api_calls = batch_request.Execute(self.client._http)  # pylint: disable=protected-access
        result_statuses = []
        for i, api_call in enumerate(api_calls):
            src, dest = src_dest_pairs[i]
            exception = None
            if api_call.is_error:
                exception = api_call.exception
                # Translate 404 to the appropriate not found exception.
                if isinstance(exception,
                              HttpError) and exception.status_code == 404:
                    exception = (GcsIOError(errno.ENOENT,
                                            'Source file not found: %s' % src))
            result_statuses.append((src, dest, exception))
        return result_statuses
Example #3
0
    def copy_batch(self,
                   src_dest_pairs,
                   dest_kms_key_name=None,
                   max_bytes_rewritten_per_call=None):
        """Copies the given GCS object from src to dest.

    Args:
      src_dest_pairs: list of (src, dest) tuples of gs://<bucket>/<name> files
                      paths to copy from src to dest, not to exceed
                      MAX_BATCH_OPERATION_SIZE in length.
      dest_kms_key_name: Experimental. No backwards compatibility guarantees.
        Encrypt dest with this Cloud KMS key. If None, will use dest bucket
        encryption defaults.
      max_bytes_rewritten_per_call: Experimental. No backwards compatibility
        guarantees. Each rewrite call will return after these many bytes. Used
        primarily for testing.

    Returns: List of tuples of (src, dest, exception) in the same order as the
             src_dest_pairs argument, where exception is None if the operation
             succeeded or the relevant exception if the operation failed.
    """
        if not src_dest_pairs:
            return []
        pair_to_request = {}
        for pair in src_dest_pairs:
            src_bucket, src_path = parse_gcs_path(pair[0])
            dest_bucket, dest_path = parse_gcs_path(pair[1])
            request = storage.StorageObjectsRewriteRequest(
                sourceBucket=src_bucket,
                sourceObject=src_path,
                destinationBucket=dest_bucket,
                destinationObject=dest_path,
                destinationKmsKeyName=dest_kms_key_name,
                maxBytesRewrittenPerCall=max_bytes_rewritten_per_call)
            pair_to_request[pair] = request
        pair_to_status = {}
        while True:
            pairs_in_batch = list(set(src_dest_pairs) - set(pair_to_status))
            if not pairs_in_batch:
                break
            batch_request = BatchApiRequest(
                batch_url=GCS_BATCH_ENDPOINT,
                retryable_codes=retry.SERVER_ERROR_OR_TIMEOUT_CODES,
                response_encoding='utf-8')
            for pair in pairs_in_batch:
                batch_request.Add(self.client.objects, 'Rewrite',
                                  pair_to_request[pair])
            api_calls = batch_request.Execute(self.client._http)  # pylint: disable=protected-access
            for pair, api_call in zip(pairs_in_batch, api_calls):
                src, dest = pair
                response = api_call.response
                if self._rewrite_cb is not None:
                    self._rewrite_cb(response)
                if api_call.is_error:
                    exception = api_call.exception
                    # Translate 404 to the appropriate not found exception.
                    if isinstance(exception,
                                  HttpError) and exception.status_code == 404:
                        exception = (GcsIOError(
                            errno.ENOENT, 'Source file not found: %s' % src))
                    pair_to_status[pair] = exception
                elif not response.done:
                    _LOGGER.debug('Rewrite progress: %d of %d bytes, %s to %s',
                                  response.totalBytesRewritten,
                                  response.objectSize, src, dest)
                    pair_to_request[pair].rewriteToken = response.rewriteToken
                else:
                    _LOGGER.debug('Rewrite done: %s to %s', src, dest)
                    pair_to_status[pair] = None

        return [(pair[0], pair[1], pair_to_status[pair])
                for pair in src_dest_pairs]