def _copy2(src, dst, metadata=None, retry_params=None):
  """Copy the file content from src to dst.

  Internal use only!

  Args:
    src: /bucket/filename
    dst: /bucket/filename
    metadata: a dict of metadata for this copy. If None, old metadata is copied.
      For example, {'x-goog-meta-foo': 'bar'}.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
  common.validate_file_path(src)
  common.validate_file_path(dst)

  if metadata is None:
    metadata = {}
    copy_meta = 'COPY'
  else:
    copy_meta = 'REPLACE'
  metadata.update({'x-goog-copy-source': src,
                   'x-goog-metadata-directive': copy_meta})

  api = storage_api._get_storage_api(retry_params=retry_params)
  status, resp_headers, content = api.put_object(
      api_utils._quote_filename(dst), headers=metadata)
  errors.check_status(status, [200], src, metadata, resp_headers, body=content)
Esempio n. 2
0
def stat(filename, retry_params=None, _account_id=None):
    """Get GCSFileStat of a Google Cloud storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Returns:
    a GCSFileStat object containing info about this file.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
    common.validate_file_path(filename)
    api = storage_api._get_storage_api(retry_params=retry_params,
                                       account_id=_account_id)
    status, headers, content = api.head_object(
        api_utils._quote_filename(filename))
    errors.check_status(status, [200],
                        filename,
                        resp_headers=headers,
                        body=content)
    file_stat = common.GCSFileStat(
        filename=filename,
        st_size=common.get_stored_content_length(headers),
        st_ctime=common.http_time_to_posix(headers.get('last-modified')),
        etag=headers.get('etag'),
        content_type=headers.get('content-type'),
        metadata=common.get_metadata(headers))

    return file_stat
Esempio n. 3
0
  def _send_data(self, data, start_offset, file_len):
    """Send the block to the storage service.

    This is a utility method that does not modify self.

    Args:
      data: data to send in str.
      start_offset: start offset of the data in relation to the file.
      file_len: an int if this is the last data to append to the file.
        Otherwise '*'.
    """
    headers = {}
    end_offset = start_offset + len(data) - 1

    if data:
      headers['content-range'] = ('bytes %d-%d/%s' %
                                  (start_offset, end_offset, file_len))
    else:
      headers['content-range'] = ('bytes */%s' % file_len)

    status, response_headers, content = self._api.put_object(
        self._path_with_token, payload=data, headers=headers)
    if file_len == '*':
      expected = 308
    else:
      expected = 200
    errors.check_status(status, [expected], self._path, headers,
                        response_headers, content,
                        {'upload_path': self._path_with_token})
def stat(filename, retry_params=None, _account_id=None):
  """Get GCSFileStat of a Google Cloud storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Returns:
    a GCSFileStat object containing info about this file.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
  common.validate_file_path(filename)
  api = storage_api._get_storage_api(retry_params=retry_params,
                                     account_id=_account_id)
  status, headers, content = api.head_object(
      api_utils._quote_filename(filename))
  errors.check_status(status, [200], filename, resp_headers=headers,
                      body=content)
  file_stat = common.GCSFileStat(
      filename=filename,
      st_size=common.get_stored_content_length(headers),
      st_ctime=common.http_time_to_posix(headers.get('last-modified')),
      etag=headers.get('etag'),
      content_type=headers.get('content-type'),
      metadata=common.get_metadata(headers))

  return file_stat
Esempio n. 5
0
    def __iter__(self):
        """Iter over the bucket.

    Yields:
      GCSFileStat: a GCSFileStat for an object in the bucket.
        They are ordered by GCSFileStat.filename.
    """
        total = 0
        max_keys = self._options.get('max-keys')

        while self._get_bucket_fut:
            status, resp_headers, content = self._get_bucket_fut.get_result()
            errors.check_status(status, [200],
                                self._path,
                                resp_headers=resp_headers,
                                body=content,
                                extras=self._options)

            if self._should_get_another_batch(content):
                self._get_bucket_fut = self._api.get_bucket_async(
                    self._path + '?' + urllib.urlencode(self._options))
            else:
                self._get_bucket_fut = None

            root = ET.fromstring(content)
            dirs = self._next_dir_gen(root)
            files = self._next_file_gen(root)
            next_file = files.next()
            next_dir = dirs.next()

            while ((max_keys is None or total < max_keys)
                   and not (next_file is None and next_dir is None)):
                total += 1
                if next_file is None:
                    self._last_yield = next_dir
                    next_dir = dirs.next()
                elif next_dir is None:
                    self._last_yield = next_file
                    next_file = files.next()
                elif next_dir < next_file:
                    self._last_yield = next_dir
                    next_dir = dirs.next()
                elif next_file < next_dir:
                    self._last_yield = next_file
                    next_file = files.next()
                else:
                    logging.error(
                        'Should never reach. next file is %r. next dir is %r.',
                        next_file, next_dir)
                if self._new_max_keys:
                    self._new_max_keys -= 1
                yield self._last_yield
  def __iter__(self):
    """Iter over the bucket.

    Yields:
      GCSFileStat: a GCSFileStat for an object in the bucket.
        They are ordered by GCSFileStat.filename.
    """
    total = 0
    max_keys = self._options.get('max-keys')

    while self._get_bucket_fut:
      status, resp_headers, content = self._get_bucket_fut.get_result()
      errors.check_status(status, [200], self._path, resp_headers=resp_headers,
                          body=content, extras=self._options)

      if self._should_get_another_batch(content):
        self._get_bucket_fut = self._api.get_bucket_async(
            self._path + '?' + urllib.urlencode(self._options))
      else:
        self._get_bucket_fut = None

      root = ET.fromstring(content)
      dirs = self._next_dir_gen(root)
      files = self._next_file_gen(root)
      next_file = files.next()
      next_dir = dirs.next()

      while ((max_keys is None or total < max_keys) and
             not (next_file is None and next_dir is None)):
        total += 1
        if next_file is None:
          self._last_yield = next_dir
          next_dir = dirs.next()
        elif next_dir is None:
          self._last_yield = next_file
          next_file = files.next()
        elif next_dir < next_file:
          self._last_yield = next_dir
          next_dir = dirs.next()
        elif next_file < next_dir:
          self._last_yield = next_file
          next_file = files.next()
        else:
          logging.error(
              'Should never reach. next file is %r. next dir is %r.',
              next_file, next_dir)
        if self._new_max_keys:
          self._new_max_keys -= 1
        yield self._last_yield
Esempio n. 7
0
  def __init__(self,
               api,
               path,
               content_type=None,
               gcs_headers=None):
    """Constructor.

    Args:
      api: A StorageApi instance.
      path: Quoted/escaped path to the object, e.g. /mybucket/myfile
      content_type: Optional content-type; Default value is
        delegate to Google Cloud Storage.
      gcs_headers: additional gs headers as a str->str dict, e.g
        {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.
    Raises:
      IOError: When this location can not be found.
    """
    assert self._maxrequestsize > self._blocksize
    assert self._maxrequestsize % self._blocksize == 0
    assert self._maxrequestsize >= self._flushsize

    self._api = api
    self._path = path

    self.name = api_utils._unquote_filename(path)
    self.closed = False

    self._buffer = collections.deque()
    self._buffered = 0
    self._written = 0
    self._offset = 0

    headers = {'x-goog-resumable': 'start'}
    if content_type:
      headers['content-type'] = content_type
    if gcs_headers:
      headers.update(gcs_headers)
    status, resp_headers, content = self._api.post_object(path, headers=headers)
    errors.check_status(status, [201], path, headers, resp_headers,
                        body=content)
    loc = resp_headers.get('location')
    if not loc:
      raise IOError('No location header found in 201 response')
    parsed = urlparse.urlparse(loc)
    self._path_with_token = '%s?%s' % (self._path, parsed.query)
Esempio n. 8
0
  def _gs_copy(self, src, dst, src_etag=None):  # pragma: no cover
    """Copy |src| file to |dst| optionally checking src ETag.

    Raises cloudstorage.FatalError on precondition error.
    """
    # See cloudstorage.cloudstorage_api._copy2.
    cloudstorage.validate_file_path(src)
    cloudstorage.validate_file_path(dst)
    headers = {
      'x-goog-copy-source': src,
      'x-goog-metadata-directive': 'COPY',
    }
    if src_etag is not None:
      headers['x-goog-copy-source-if-match'] = src_etag
    api = storage_api._get_storage_api(retry_params=self._retry_params)
    status, resp_headers, content = api.put_object(
        api_utils._quote_filename(dst), headers=headers)
    errors.check_status(status, [200], src, headers, resp_headers, body=content)
def delete(filename, retry_params=None, _account_id=None):
  """Delete a Google Cloud Storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Raises:
    errors.NotFoundError: if the file doesn't exist prior to deletion.
  """
  api = storage_api._get_storage_api(retry_params=retry_params,
                                     account_id=_account_id)
  common.validate_file_path(filename)
  filename = api_utils._quote_filename(filename)
  status, resp_headers, content = api.delete_object(filename)
  errors.check_status(status, [204], filename, resp_headers=resp_headers,
                      body=content)
Esempio n. 10
0
  def _get_offset_from_gcs(self):
    """Get the last offset that has been written to GCS.

    This is a utility method that does not modify self.

    Returns:
      an int of the last offset written to GCS by this upload, inclusive.
      -1 means nothing has been written.
    """
    headers = {'content-range': 'bytes */*'}
    status, response_headers, content = self._api.put_object(
        self._path_with_token, headers=headers)
    errors.check_status(status, [308], self._path, headers,
                        response_headers, content,
                        {'upload_path': self._path_with_token})
    val = response_headers.get('range')
    if val is None:
      return -1
    _, offset = val.rsplit('-', 1)
    return int(offset)
Esempio n. 11
0
  def __init__(self,
               api,
               path,
               buffer_size=DEFAULT_BUFFER_SIZE,
               max_request_size=MAX_REQUEST_SIZE):
    """Constructor.

    Args:
      api: A StorageApi instance.
      path: Quoted/escaped path to the object, e.g. /mybucket/myfile
      buffer_size: buffer size. The ReadBuffer keeps
        one buffer. But there may be a pending future that contains
        a second buffer. This size must be less than max_request_size.
      max_request_size: Max bytes to request in one urlfetch.
    """
    self._api = api
    self._path = path
    self.name = api_utils._unquote_filename(path)
    self.closed = False

    assert buffer_size <= max_request_size
    self._buffer_size = buffer_size
    self._max_request_size = max_request_size
    self._offset = 0
    self._buffer = _Buffer()
    self._etag = None

    get_future = self._get_segment(0, self._buffer_size, check_response=False)

    status, headers, content = self._api.head_object(path)
    errors.check_status(status, [200], path, resp_headers=headers, body=content)
    self._file_size = long(common.get_stored_content_length(headers))
    self._check_etag(headers.get('etag'))

    self._buffer_future = None

    if self._file_size != 0:
      content, check_response_closure = get_future.get_result()
      check_response_closure()
      self._buffer.reset(content)
      self._request_next_buffer()
Esempio n. 12
0
def delete(filename, retry_params=None, _account_id=None):
    """Delete a Google Cloud Storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Raises:
    errors.NotFoundError: if the file doesn't exist prior to deletion.
  """
    api = storage_api._get_storage_api(retry_params=retry_params,
                                       account_id=_account_id)
    common.validate_file_path(filename)
    filename = api_utils._quote_filename(filename)
    status, resp_headers, content = api.delete_object(filename)
    errors.check_status(status, [204],
                        filename,
                        resp_headers=resp_headers,
                        body=content)
Esempio n. 13
0
    def _gs_copy(self, src, dst, src_etag=None):  # pragma: no cover
        """Copy |src| file to |dst| optionally checking src ETag.

    Raises cloudstorage.FatalError on precondition error.
    """
        # See cloudstorage.cloudstorage_api._copy2.
        cloudstorage.validate_file_path(src)
        cloudstorage.validate_file_path(dst)
        headers = {
            'x-goog-copy-source': src,
            'x-goog-metadata-directive': 'COPY',
        }
        if src_etag is not None:
            headers['x-goog-copy-source-if-match'] = src_etag
        api = storage_api._get_storage_api(retry_params=self._retry_params)
        status, resp_headers, content = api.put_object(
            api_utils._quote_filename(dst), headers=headers)
        errors.check_status(status, [200],
                            src,
                            headers,
                            resp_headers,
                            body=content)
Esempio n. 14
0
def _copy2(src, dst, metadata=None, retry_params=None):
    """Copy the file content from src to dst.

  Internal use only!

  Args:
    src: /bucket/filename
    dst: /bucket/filename
    metadata: a dict of metadata for this copy. If None, old metadata is copied.
      For example, {'x-goog-meta-foo': 'bar'}.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
    common.validate_file_path(src)
    common.validate_file_path(dst)

    if metadata is None:
        metadata = {}
        copy_meta = 'COPY'
    else:
        copy_meta = 'REPLACE'
    metadata.update({
        'x-goog-copy-source': src,
        'x-goog-metadata-directive': copy_meta
    })

    api = storage_api._get_storage_api(retry_params=retry_params)
    status, resp_headers, content = api.put_object(
        api_utils._quote_filename(dst), headers=metadata)
    errors.check_status(status, [200],
                        src,
                        metadata,
                        resp_headers,
                        body=content)
Esempio n. 15
0
 def _checker():
   errors.check_status(status, [200, 206], self._path, headers,
                       resp_headers, body=content)
   self._check_etag(resp_headers.get('etag'))
 def _make_api_call(bucket, file_list, destination_file, content_type, retry_params, _account_id):
   """
       Internal Only
       Makes the actual calls.
       Currently stubbed because the dev server cloudstorage_stub.py
         does not handle compose requests.
       TODO: When the dev server gets patch please remove the stub
   Args:
     bucket: Bucket where the files are kept
     file_list: list of dicts with the file name (see compose argument "list_of_files" for format).
     destination_file: Path to the destination file.
     content_type: Content type for the destination file.
     retry_params: An api_utils.RetryParams for this call to GCS. If None,
     the default one is used.
   _account_id: Internal-use only.
   """
   if len(file_list) == 0:
     raise ValueError("Unable to merge 0 files")
   if len(file_list) == 1:
     _copy2(bucket + file_list[0]["file_name"], destination_file)
     return
   '''
   Needed until cloudstorage_stub.py is updated to accept compose requests
   TODO: When patched remove the True flow from this if.
   '''
   if 'development' in os.environ.get('SERVER_SOFTWARE', '').lower():
     '''
     Below is making the call to the Development server
     '''
     with open(destination_file, "w", content_type=content_type) as gcs_merge:
       for source_file in file_list:
         try:
           with open(bucket + source_file['file_name'], "r") as gcs_source:
             gcs_merge.write(gcs_source.read())
         except cloud_errors.NotFoundError:
           logging.warn("File not found %s, skipping", source_file['file_name'])
   else:
     '''
     Below is making the call to the Production server
     '''
     xml = ""
     for item in file_list:
       generation = item.get("Generation", "")
       generation_match = item.get("IfGenerationMatch", "")
       if generation != "":
         generation = "<Generation>%s</Generation>" % generation
       if generation_match != "":
         generation_match = "<IfGenerationMatch>%s</IfGenerationMatch>" % generation_match
       xml += "<Component><Name>%s</Name>%s%s</Component>" % \
                 (item["file_name"], generation, generation_match)
     xml = "<ComposeRequest>%s</ComposeRequest>" % xml
     logging.info(xml)
     # pylint: disable=protected-access
     api = cloudstorage.storage_api._get_storage_api(retry_params=retry_params,
                                  account_id=_account_id)
     headers = {"Content-Type" : content_type}
     # pylint: disable=no-member
     status, resp_headers, content = api.put_object(
               cloudstorage.api_utils._quote_filename(destination_file) + "?compose",
                                         payload=xml,
                                         headers=headers)
     # TODO: confirm whether [200] is sufficient, or if 204 etc. might be returned?
     cloud_errors.check_status(status, [200], destination_file, resp_headers, body=content)