def copy( self, src, dest, dest_kms_key_name=None, max_bytes_rewritten_per_call=None): """Copies the given GCS object from src to dest. Args: src: GCS file path pattern in the form gs://<bucket>/<name>. dest: GCS file path pattern in the form gs://<bucket>/<name>. dest_kms_key_name: Experimental. No backwards compatibility guarantees. Encrypt dest with this Cloud KMS key. If None, will use dest bucket encryption defaults. max_bytes_rewritten_per_call: Experimental. No backwards compatibility guarantees. Each rewrite API call will return after these many bytes. Used for testing. Raises: TimeoutError: on timeout. """ src_bucket, src_path = parse_gcs_path(src) dest_bucket, dest_path = parse_gcs_path(dest) request = storage.StorageObjectsRewriteRequest( sourceBucket=src_bucket, sourceObject=src_path, destinationBucket=dest_bucket, destinationObject=dest_path, destinationKmsKeyName=dest_kms_key_name, maxBytesRewrittenPerCall=max_bytes_rewritten_per_call) response = self.client.objects.Rewrite(request) while not response.done: _LOGGER.debug( 'Rewrite progress: %d of %d bytes, %s to %s', response.totalBytesRewritten, response.objectSize, src, dest) request.rewriteToken = response.rewriteToken response = self.client.objects.Rewrite(request) if self._rewrite_cb is not None: self._rewrite_cb(response) _LOGGER.debug('Rewrite done: %s to %s', src, dest)
def copy_batch(self, src_dest_pairs, dest_kms_key_name=None, max_bytes_rewritten_per_call=None): """Copies the given GCS object from src to dest. Args: src_dest_pairs: list of (src, dest) tuples of gs://<bucket>/<name> files paths to copy from src to dest, not to exceed MAX_BATCH_OPERATION_SIZE in length. dest_kms_key_name: Experimental. No backwards compatibility guarantees. Encrypt dest with this Cloud KMS key. If None, will use dest bucket encryption defaults. max_bytes_rewritten_per_call: Experimental. No backwards compatibility guarantees. Each rewrite call will return after these many bytes. Used primarily for testing. Returns: List of tuples of (src, dest, exception) in the same order as the src_dest_pairs argument, where exception is None if the operation succeeded or the relevant exception if the operation failed. """ if not src_dest_pairs: return [] pair_to_request = {} for pair in src_dest_pairs: src_bucket, src_path = parse_gcs_path(pair[0]) dest_bucket, dest_path = parse_gcs_path(pair[1]) request = storage.StorageObjectsRewriteRequest( sourceBucket=src_bucket, sourceObject=src_path, destinationBucket=dest_bucket, destinationObject=dest_path, destinationKmsKeyName=dest_kms_key_name, maxBytesRewrittenPerCall=max_bytes_rewritten_per_call) pair_to_request[pair] = request pair_to_status = {} while True: pairs_in_batch = list(set(src_dest_pairs) - set(pair_to_status)) if not pairs_in_batch: break batch_request = BatchApiRequest( batch_url=GCS_BATCH_ENDPOINT, retryable_codes=retry.SERVER_ERROR_OR_TIMEOUT_CODES, response_encoding='utf-8') for pair in pairs_in_batch: batch_request.Add(self.client.objects, 'Rewrite', pair_to_request[pair]) api_calls = batch_request.Execute(self.client._http) # pylint: disable=protected-access for pair, api_call in zip(pairs_in_batch, api_calls): src, dest = pair response = api_call.response if self._rewrite_cb is not None: self._rewrite_cb(response) if api_call.is_error: exception = api_call.exception # Translate 404 to the appropriate not found exception. if isinstance(exception, HttpError) and exception.status_code == 404: exception = (GcsIOError( errno.ENOENT, 'Source file not found: %s' % src)) pair_to_status[pair] = exception elif not response.done: _LOGGER.debug('Rewrite progress: %d of %d bytes, %s to %s', response.totalBytesRewritten, response.objectSize, src, dest) pair_to_request[pair].rewriteToken = response.rewriteToken else: _LOGGER.debug('Rewrite done: %s to %s', src, dest) pair_to_status[pair] = None return [(pair[0], pair[1], pair_to_status[pair]) for pair in src_dest_pairs]