def _copy2(src, dst, metadata=None, retry_params=None): """Copy the file content from src to dst. Internal use only! Args: src: /bucket/filename dst: /bucket/filename metadata: a dict of metadata for this copy. If None, old metadata is copied. For example, {'x-goog-meta-foo': 'bar'}. retry_params: An api_utils.RetryParams for this call to GCS. If None, the default one is used. Raises: errors.AuthorizationError: if authorization failed. errors.NotFoundError: if an object that's expected to exist doesn't. """ common.validate_file_path(src) common.validate_file_path(dst) if metadata is None: metadata = {} copy_meta = 'COPY' else: copy_meta = 'REPLACE' metadata.update({'x-goog-copy-source': src, 'x-goog-metadata-directive': copy_meta}) api = storage_api._get_storage_api(retry_params=retry_params) status, resp_headers, content = api.put_object( api_utils._quote_filename(dst), headers=metadata) errors.check_status(status, [200], src, metadata, resp_headers, body=content)
def stat(filename, retry_params=None, _account_id=None): """Get GCSFileStat of a Google Cloud storage file. Args: filename: A Google Cloud Storage filename of form '/bucket/filename'. retry_params: An api_utils.RetryParams for this call to GCS. If None, the default one is used. _account_id: Internal-use only. Returns: a GCSFileStat object containing info about this file. Raises: errors.AuthorizationError: if authorization failed. errors.NotFoundError: if an object that's expected to exist doesn't. """ common.validate_file_path(filename) api = storage_api._get_storage_api(retry_params=retry_params, account_id=_account_id) status, headers, content = api.head_object( api_utils._quote_filename(filename)) errors.check_status(status, [200], filename, resp_headers=headers, body=content) file_stat = common.GCSFileStat( filename=filename, st_size=common.get_stored_content_length(headers), st_ctime=common.http_time_to_posix(headers.get('last-modified')), etag=headers.get('etag'), content_type=headers.get('content-type'), metadata=common.get_metadata(headers)) return file_stat
def _send_data(self, data, start_offset, file_len): """Send the block to the storage service. This is a utility method that does not modify self. Args: data: data to send in str. start_offset: start offset of the data in relation to the file. file_len: an int if this is the last data to append to the file. Otherwise '*'. """ headers = {} end_offset = start_offset + len(data) - 1 if data: headers['content-range'] = ('bytes %d-%d/%s' % (start_offset, end_offset, file_len)) else: headers['content-range'] = ('bytes */%s' % file_len) status, response_headers, content = self._api.put_object( self._path_with_token, payload=data, headers=headers) if file_len == '*': expected = 308 else: expected = 200 errors.check_status(status, [expected], self._path, headers, response_headers, content, {'upload_path': self._path_with_token})
def __iter__(self): """Iter over the bucket. Yields: GCSFileStat: a GCSFileStat for an object in the bucket. They are ordered by GCSFileStat.filename. """ total = 0 max_keys = self._options.get('max-keys') while self._get_bucket_fut: status, resp_headers, content = self._get_bucket_fut.get_result() errors.check_status(status, [200], self._path, resp_headers=resp_headers, body=content, extras=self._options) if self._should_get_another_batch(content): self._get_bucket_fut = self._api.get_bucket_async( self._path + '?' + urllib.urlencode(self._options)) else: self._get_bucket_fut = None root = ET.fromstring(content) dirs = self._next_dir_gen(root) files = self._next_file_gen(root) next_file = files.next() next_dir = dirs.next() while ((max_keys is None or total < max_keys) and not (next_file is None and next_dir is None)): total += 1 if next_file is None: self._last_yield = next_dir next_dir = dirs.next() elif next_dir is None: self._last_yield = next_file next_file = files.next() elif next_dir < next_file: self._last_yield = next_dir next_dir = dirs.next() elif next_file < next_dir: self._last_yield = next_file next_file = files.next() else: logging.error( 'Should never reach. next file is %r. next dir is %r.', next_file, next_dir) if self._new_max_keys: self._new_max_keys -= 1 yield self._last_yield
def __init__(self, api, path, content_type=None, gcs_headers=None): """Constructor. Args: api: A StorageApi instance. path: Quoted/escaped path to the object, e.g. /mybucket/myfile content_type: Optional content-type; Default value is delegate to Google Cloud Storage. gcs_headers: additional gs headers as a str->str dict, e.g {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}. Raises: IOError: When this location can not be found. """ assert self._maxrequestsize > self._blocksize assert self._maxrequestsize % self._blocksize == 0 assert self._maxrequestsize >= self._flushsize self._api = api self._path = path self.name = api_utils._unquote_filename(path) self.closed = False self._buffer = collections.deque() self._buffered = 0 self._written = 0 self._offset = 0 headers = {'x-goog-resumable': 'start'} if content_type: headers['content-type'] = content_type if gcs_headers: headers.update(gcs_headers) status, resp_headers, content = self._api.post_object(path, headers=headers) errors.check_status(status, [201], path, headers, resp_headers, body=content) loc = resp_headers.get('location') if not loc: raise IOError('No location header found in 201 response') parsed = urlparse.urlparse(loc) self._path_with_token = '%s?%s' % (self._path, parsed.query)
def _gs_copy(self, src, dst, src_etag=None): # pragma: no cover """Copy |src| file to |dst| optionally checking src ETag. Raises cloudstorage.FatalError on precondition error. """ # See cloudstorage.cloudstorage_api._copy2. cloudstorage.validate_file_path(src) cloudstorage.validate_file_path(dst) headers = { 'x-goog-copy-source': src, 'x-goog-metadata-directive': 'COPY', } if src_etag is not None: headers['x-goog-copy-source-if-match'] = src_etag api = storage_api._get_storage_api(retry_params=self._retry_params) status, resp_headers, content = api.put_object( api_utils._quote_filename(dst), headers=headers) errors.check_status(status, [200], src, headers, resp_headers, body=content)
def delete(filename, retry_params=None, _account_id=None): """Delete a Google Cloud Storage file. Args: filename: A Google Cloud Storage filename of form '/bucket/filename'. retry_params: An api_utils.RetryParams for this call to GCS. If None, the default one is used. _account_id: Internal-use only. Raises: errors.NotFoundError: if the file doesn't exist prior to deletion. """ api = storage_api._get_storage_api(retry_params=retry_params, account_id=_account_id) common.validate_file_path(filename) filename = api_utils._quote_filename(filename) status, resp_headers, content = api.delete_object(filename) errors.check_status(status, [204], filename, resp_headers=resp_headers, body=content)
def _get_offset_from_gcs(self): """Get the last offset that has been written to GCS. This is a utility method that does not modify self. Returns: an int of the last offset written to GCS by this upload, inclusive. -1 means nothing has been written. """ headers = {'content-range': 'bytes */*'} status, response_headers, content = self._api.put_object( self._path_with_token, headers=headers) errors.check_status(status, [308], self._path, headers, response_headers, content, {'upload_path': self._path_with_token}) val = response_headers.get('range') if val is None: return -1 _, offset = val.rsplit('-', 1) return int(offset)
def __init__(self, api, path, buffer_size=DEFAULT_BUFFER_SIZE, max_request_size=MAX_REQUEST_SIZE): """Constructor. Args: api: A StorageApi instance. path: Quoted/escaped path to the object, e.g. /mybucket/myfile buffer_size: buffer size. The ReadBuffer keeps one buffer. But there may be a pending future that contains a second buffer. This size must be less than max_request_size. max_request_size: Max bytes to request in one urlfetch. """ self._api = api self._path = path self.name = api_utils._unquote_filename(path) self.closed = False assert buffer_size <= max_request_size self._buffer_size = buffer_size self._max_request_size = max_request_size self._offset = 0 self._buffer = _Buffer() self._etag = None get_future = self._get_segment(0, self._buffer_size, check_response=False) status, headers, content = self._api.head_object(path) errors.check_status(status, [200], path, resp_headers=headers, body=content) self._file_size = long(common.get_stored_content_length(headers)) self._check_etag(headers.get('etag')) self._buffer_future = None if self._file_size != 0: content, check_response_closure = get_future.get_result() check_response_closure() self._buffer.reset(content) self._request_next_buffer()
def _copy2(src, dst, metadata=None, retry_params=None): """Copy the file content from src to dst. Internal use only! Args: src: /bucket/filename dst: /bucket/filename metadata: a dict of metadata for this copy. If None, old metadata is copied. For example, {'x-goog-meta-foo': 'bar'}. retry_params: An api_utils.RetryParams for this call to GCS. If None, the default one is used. Raises: errors.AuthorizationError: if authorization failed. errors.NotFoundError: if an object that's expected to exist doesn't. """ common.validate_file_path(src) common.validate_file_path(dst) if metadata is None: metadata = {} copy_meta = 'COPY' else: copy_meta = 'REPLACE' metadata.update({ 'x-goog-copy-source': src, 'x-goog-metadata-directive': copy_meta }) api = storage_api._get_storage_api(retry_params=retry_params) status, resp_headers, content = api.put_object( api_utils._quote_filename(dst), headers=metadata) errors.check_status(status, [200], src, metadata, resp_headers, body=content)
def _checker(): errors.check_status(status, [200, 206], self._path, headers, resp_headers, body=content) self._check_etag(resp_headers.get('etag'))
def _make_api_call(bucket, file_list, destination_file, content_type, retry_params, _account_id): """ Internal Only Makes the actual calls. Currently stubbed because the dev server cloudstorage_stub.py does not handle compose requests. TODO: When the dev server gets patch please remove the stub Args: bucket: Bucket where the files are kept file_list: list of dicts with the file name (see compose argument "list_of_files" for format). destination_file: Path to the destination file. content_type: Content type for the destination file. retry_params: An api_utils.RetryParams for this call to GCS. If None, the default one is used. _account_id: Internal-use only. """ if len(file_list) == 0: raise ValueError("Unable to merge 0 files") if len(file_list) == 1: _copy2(bucket + file_list[0]["file_name"], destination_file) return ''' Needed until cloudstorage_stub.py is updated to accept compose requests TODO: When patched remove the True flow from this if. ''' if 'development' in os.environ.get('SERVER_SOFTWARE', '').lower(): ''' Below is making the call to the Development server ''' with open(destination_file, "w", content_type=content_type) as gcs_merge: for source_file in file_list: try: with open(bucket + source_file['file_name'], "r") as gcs_source: gcs_merge.write(gcs_source.read()) except cloud_errors.NotFoundError: logging.warn("File not found %s, skipping", source_file['file_name']) else: ''' Below is making the call to the Production server ''' xml = "" for item in file_list: generation = item.get("Generation", "") generation_match = item.get("IfGenerationMatch", "") if generation != "": generation = "<Generation>%s</Generation>" % generation if generation_match != "": generation_match = "<IfGenerationMatch>%s</IfGenerationMatch>" % generation_match xml += "<Component><Name>%s</Name>%s%s</Component>" % \ (item["file_name"], generation, generation_match) xml = "<ComposeRequest>%s</ComposeRequest>" % xml logging.info(xml) # pylint: disable=protected-access api = cloudstorage.storage_api._get_storage_api(retry_params=retry_params, account_id=_account_id) headers = {"Content-Type" : content_type} # pylint: disable=no-member status, resp_headers, content = api.put_object( cloudstorage.api_utils._quote_filename(destination_file) + "?compose", payload=xml, headers=headers) # TODO: confirm whether [200] is sufficient, or if 204 etc. might be returned? cloud_errors.check_status(status, [200], destination_file, resp_headers, body=content)