Example #1
0
def stat(filename, retry_params=None, _account_id=None):
    """Get GCSFileStat of a Google Cloud storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Returns:
    a GCSFileStat object containing info about this file.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
    common.validate_file_path(filename)
    api = storage_api._get_storage_api(retry_params=retry_params,
                                       account_id=_account_id)
    status, headers, content = api.head_object(
        api_utils._quote_filename(filename))
    errors.check_status(status, [200],
                        filename,
                        resp_headers=headers,
                        body=content)
    file_stat = common.GCSFileStat(
        filename=filename,
        st_size=common.get_stored_content_length(headers),
        st_ctime=common.http_time_to_posix(headers.get('last-modified')),
        etag=headers.get('etag'),
        content_type=headers.get('content-type'),
        metadata=common.get_metadata(headers))

    return file_stat
def _copy2(src, dst, metadata=None, retry_params=None):
  """Copy the file content from src to dst.

  Internal use only!

  Args:
    src: /bucket/filename
    dst: /bucket/filename
    metadata: a dict of metadata for this copy. If None, old metadata is copied.
      For example, {'x-goog-meta-foo': 'bar'}.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
  common.validate_file_path(src)
  common.validate_file_path(dst)

  if metadata is None:
    metadata = {}
    copy_meta = 'COPY'
  else:
    copy_meta = 'REPLACE'
  metadata.update({'x-goog-copy-source': src,
                   'x-goog-metadata-directive': copy_meta})

  api = storage_api._get_storage_api(retry_params=retry_params)
  status, resp_headers, content = api.put_object(
      api_utils._quote_filename(dst), headers=metadata)
  errors.check_status(status, [200], src, metadata, resp_headers, body=content)
def stat(filename, retry_params=None, _account_id=None):
  """Get GCSFileStat of a Google Cloud storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Returns:
    a GCSFileStat object containing info about this file.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
  common.validate_file_path(filename)
  api = storage_api._get_storage_api(retry_params=retry_params,
                                     account_id=_account_id)
  status, headers, content = api.head_object(
      api_utils._quote_filename(filename))
  errors.check_status(status, [200], filename, resp_headers=headers,
                      body=content)
  file_stat = common.GCSFileStat(
      filename=filename,
      st_size=common.get_stored_content_length(headers),
      st_ctime=common.http_time_to_posix(headers.get('last-modified')),
      etag=headers.get('etag'),
      content_type=headers.get('content-type'),
      metadata=common.get_metadata(headers))

  return file_stat
Example #4
0
def open(filename,
         mode='r',
         content_type=None,
         options=None,
         read_buffer_size=storage_api.ReadBuffer.DEFAULT_BUFFER_SIZE,
         retry_params=None,
         _account_id=None):
    """Opens a Google Cloud Storage file and returns it as a File-like object.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    mode: 'r' for reading mode. 'w' for writing mode.
      In reading mode, the file must exist. In writing mode, a file will
      be created or be overrode.
    content_type: The MIME type of the file. str. Only valid in writing mode.
    options: A str->basestring dict to specify additional headers to pass to
      GCS e.g. {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.
      Supported options are x-goog-acl, x-goog-meta-, cache-control,
      content-disposition, and content-encoding.
      Only valid in writing mode.
      See https://developers.google.com/storage/docs/reference-headers
      for details.
    read_buffer_size: The buffer size for read. Read keeps a buffer
      and prefetches another one. To minimize blocking for large files,
      always read by buffer size. To minimize number of RPC requests for
      small files, set a large buffer size. Max is 30MB.
    retry_params: An instance of api_utils.RetryParams for subsequent calls
      to GCS from this file handle. If None, the default one is used.
    _account_id: Internal-use only.

  Returns:
    A reading or writing buffer that supports File-like interface. Buffer
    must be closed after operations are done.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
    ValueError: invalid open mode or if content_type or options are specified
      in reading mode.
  """
    common.validate_file_path(filename)
    api = storage_api._get_storage_api(retry_params=retry_params,
                                       account_id=_account_id)
    filename = api_utils._quote_filename(filename)

    if mode == 'w':
        common.validate_options(options)
        return storage_api.StreamingBuffer(api, filename, content_type,
                                           options)
    elif mode == 'r':
        if content_type or options:
            raise ValueError('Options and content_type can only be specified '
                             'for writing mode.')
        return storage_api.ReadBuffer(api,
                                      filename,
                                      buffer_size=read_buffer_size)
    else:
        raise ValueError('Invalid mode %s.' % mode)
def open(filename,
         mode='r',
         content_type=None,
         options=None,
         read_buffer_size=storage_api.ReadBuffer.DEFAULT_BUFFER_SIZE,
         retry_params=None,
         _account_id=None):
  """Opens a Google Cloud Storage file and returns it as a File-like object.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    mode: 'r' for reading mode. 'w' for writing mode.
      In reading mode, the file must exist. In writing mode, a file will
      be created or be overrode.
    content_type: The MIME type of the file. str. Only valid in writing mode.
    options: A str->basestring dict to specify additional headers to pass to
      GCS e.g. {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.
      Supported options are x-goog-acl, x-goog-meta-, cache-control,
      content-disposition, and content-encoding.
      Only valid in writing mode.
      See https://developers.google.com/storage/docs/reference-headers
      for details.
    read_buffer_size: The buffer size for read. Read keeps a buffer
      and prefetches another one. To minimize blocking for large files,
      always read by buffer size. To minimize number of RPC requests for
      small files, set a large buffer size. Max is 30MB.
    retry_params: An instance of api_utils.RetryParams for subsequent calls
      to GCS from this file handle. If None, the default one is used.
    _account_id: Internal-use only.

  Returns:
    A reading or writing buffer that supports File-like interface. Buffer
    must be closed after operations are done.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
    ValueError: invalid open mode or if content_type or options are specified
      in reading mode.
  """
  common.validate_file_path(filename)
  api = storage_api._get_storage_api(retry_params=retry_params,
                                     account_id=_account_id)
  filename = api_utils._quote_filename(filename)

  if mode == 'w':
    common.validate_options(options)
    return storage_api.StreamingBuffer(api, filename, content_type, options)
  elif mode == 'r':
    if content_type or options:
      raise ValueError('Options and content_type can only be specified '
                       'for writing mode.')
    return storage_api.ReadBuffer(api,
                                  filename,
                                  buffer_size=read_buffer_size)
  else:
    raise ValueError('Invalid mode %s.' % mode)
def compose(list_of_files,
            destination_file,
            files_metadata=None,
            content_type=None,
            retry_params=None,
            _account_id=None):
    """Runs the GCS Compose on the given files.
  Merges between 2 and 32 files into one file. Composite files may even
  be built from other existing composites, provided that the total
  component count does not exceed 1024. See here for details:
  https://cloud.google.com/storage/docs/composite-objects
  Args:
    list_of_files: List of file name strings with no leading slashes or bucket.
    destination_file: Path to the output file. Must have the bucket in the path.
    files_metadata: Optional, file metadata, order must match list_of_files,
      see link for available options:
      https://cloud.google.com/storage/docs/composite-objects#_Xml
    content_type: Optional, used to specify content-header of the output file.
    retry_params: Optional, an api_utils.RetryParams for this call to GCS.
      If None,the default one is used.
    _account_id: Internal-use only.
  Raises:
    ValueError: If the number of files is outside the range of 2-32.
  """

    log.warn('Warning the cloudstorage libbrary is out of date and does'
             ' not have the needed library. Currently running an out of '
             'date stub. Please update your cloudstorage library.')
    api = storage_api._get_storage_api(retry_params=retry_params,
                                       account_id=_account_id)

    # Needed until cloudstorage_stub.py is updated to accept compose requests
    # TODO([email protected]): When patched remove the True flow from this if.

    if os.getenv('SERVER_SOFTWARE').startswith('Dev'):

        def _temp_func(file_list, destination_file, content_type):
            """Dev server stub remove when the dev server accepts compose requests."""
            bucket = '/' + destination_file.split('/')[1] + '/'
            with open(destination_file, 'w',
                      content_type=content_type) as gcs_merge:
                for source_file in file_list:
                    with open(bucket + source_file['Name'], 'r') as gcs_source:
                        gcs_merge.write(gcs_source.read())

        compose_object = _temp_func
    else:
        compose_object = api.compose_object
    file_list, _ = _validate_compose_list(destination_file, list_of_files,
                                          files_metadata, 32)
    compose_object(file_list, destination_file, content_type)
Example #7
0
  def _gs_copy(self, src, dst, src_etag=None):  # pragma: no cover
    """Copy |src| file to |dst| optionally checking src ETag.

    Raises cloudstorage.FatalError on precondition error.
    """
    # See cloudstorage.cloudstorage_api._copy2.
    cloudstorage.validate_file_path(src)
    cloudstorage.validate_file_path(dst)
    headers = {
      'x-goog-copy-source': src,
      'x-goog-metadata-directive': 'COPY',
    }
    if src_etag is not None:
      headers['x-goog-copy-source-if-match'] = src_etag
    api = storage_api._get_storage_api(retry_params=self._retry_params)
    status, resp_headers, content = api.put_object(
        api_utils._quote_filename(dst), headers=headers)
    errors.check_status(status, [200], src, headers, resp_headers, body=content)
def delete(filename, retry_params=None, _account_id=None):
  """Delete a Google Cloud Storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Raises:
    errors.NotFoundError: if the file doesn't exist prior to deletion.
  """
  api = storage_api._get_storage_api(retry_params=retry_params,
                                     account_id=_account_id)
  common.validate_file_path(filename)
  filename = api_utils._quote_filename(filename)
  status, resp_headers, content = api.delete_object(filename)
  errors.check_status(status, [204], filename, resp_headers=resp_headers,
                      body=content)
Example #9
0
def delete(filename, retry_params=None, _account_id=None):
    """Delete a Google Cloud Storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Raises:
    errors.NotFoundError: if the file doesn't exist prior to deletion.
  """
    api = storage_api._get_storage_api(retry_params=retry_params,
                                       account_id=_account_id)
    common.validate_file_path(filename)
    filename = api_utils._quote_filename(filename)
    status, resp_headers, content = api.delete_object(filename)
    errors.check_status(status, [204],
                        filename,
                        resp_headers=resp_headers,
                        body=content)
    def remove_all_cached_thumbnail_images(self):
        """
            Removes all cached thumbnail images from GCS for this YT video
        """
        gcs_folder = self.gcs_format.format(gcs_bucket=settings.GCS_BUCKET,
                                            bucket_folder=self.gcs_bucket,
                                            yt_id=self.youtube_id)
        api = storage_api._get_storage_api(None)

        futures = {}
        for file_stat in gcs.listbucket(gcs_folder, delimiter="/"):
            filename = api_utils._quote_filename(file_stat.filename)
            futures[file_stat.filename] = api.delete_object_async(filename)

        for filename, future in futures.items():
            status, resp_headers, content = future.get_result()
            if status != 204:
                logging.error("Could not delete thumbnail {0}: {1}", filename,
                              content)
            else:
                logging.info("Deleted thumbnail file {0}:", filename)
Example #11
0
    def _gs_copy(self, src, dst, src_etag=None):  # pragma: no cover
        """Copy |src| file to |dst| optionally checking src ETag.

    Raises cloudstorage.FatalError on precondition error.
    """
        # See cloudstorage.cloudstorage_api._copy2.
        cloudstorage.validate_file_path(src)
        cloudstorage.validate_file_path(dst)
        headers = {
            'x-goog-copy-source': src,
            'x-goog-metadata-directive': 'COPY',
        }
        if src_etag is not None:
            headers['x-goog-copy-source-if-match'] = src_etag
        api = storage_api._get_storage_api(retry_params=self._retry_params)
        status, resp_headers, content = api.put_object(
            api_utils._quote_filename(dst), headers=headers)
        errors.check_status(status, [200],
                            src,
                            headers,
                            resp_headers,
                            body=content)
Example #12
0
def _copy2(src, dst, metadata=None, retry_params=None):
    """Copy the file content from src to dst.

  Internal use only!

  Args:
    src: /bucket/filename
    dst: /bucket/filename
    metadata: a dict of metadata for this copy. If None, old metadata is copied.
      For example, {'x-goog-meta-foo': 'bar'}.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
    common.validate_file_path(src)
    common.validate_file_path(dst)

    if metadata is None:
        metadata = {}
        copy_meta = 'COPY'
    else:
        copy_meta = 'REPLACE'
    metadata.update({
        'x-goog-copy-source': src,
        'x-goog-metadata-directive': copy_meta
    })

    api = storage_api._get_storage_api(retry_params=retry_params)
    status, resp_headers, content = api.put_object(
        api_utils._quote_filename(dst), headers=metadata)
    errors.check_status(status, [200],
                        src,
                        metadata,
                        resp_headers,
                        body=content)
Example #13
0
def listbucket(path_prefix,
               marker=None,
               prefix=None,
               max_keys=None,
               delimiter=None,
               retry_params=None,
               _account_id=None):
    """Returns a GCSFileStat iterator over a bucket.

  Optional arguments can limit the result to a subset of files under bucket.

  This function has two modes:
  1. List bucket mode: Lists all files in the bucket without any concept of
     hierarchy. GCS doesn't have real directory hierarchies.
  2. Directory emulation mode: If you specify the 'delimiter' argument,
     it is used as a path separator to emulate a hierarchy of directories.
     In this mode, the "path_prefix" argument should end in the delimiter
     specified (thus designates a logical directory). The logical directory's
     contents, both files and subdirectories, are listed. The names of
     subdirectories returned will end with the delimiter. So listbucket
     can be called with the subdirectory name to list the subdirectory's
     contents.

  Args:
    path_prefix: A Google Cloud Storage path of format "/bucket" or
      "/bucket/prefix". Only objects whose fullpath starts with the
      path_prefix will be returned.
    marker: Another path prefix. Only objects whose fullpath starts
      lexicographically after marker will be returned (exclusive).
    prefix: Deprecated. Use path_prefix.
    max_keys: The limit on the number of objects to return. int.
      For best performance, specify max_keys only if you know how many objects
      you want. Otherwise, this method requests large batches and handles
      pagination for you.
    delimiter: Use to turn on directory mode. str of one or multiple chars
      that your bucket uses as its directory separator.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Examples:
    For files "/bucket/a",
              "/bucket/bar/1"
              "/bucket/foo",
              "/bucket/foo/1", "/bucket/foo/2/1", "/bucket/foo/3/1",

    Regular mode:
    listbucket("/bucket/f", marker="/bucket/foo/1")
    will match "/bucket/foo/2/1", "/bucket/foo/3/1".

    Directory mode:
    listbucket("/bucket/", delimiter="/")
    will match "/bucket/a, "/bucket/bar/" "/bucket/foo", "/bucket/foo/".
    listbucket("/bucket/foo/", delimiter="/")
    will match "/bucket/foo/1", "/bucket/foo/2/", "/bucket/foo/3/"

  Returns:
    Regular mode:
    A GCSFileStat iterator over matched files ordered by filename.
    The iterator returns GCSFileStat objects. filename, etag, st_size,
    st_ctime, and is_dir are set.

    Directory emulation mode:
    A GCSFileStat iterator over matched files and directories ordered by
    name. The iterator returns GCSFileStat objects. For directories,
    only the filename and is_dir fields are set.

    The last name yielded can be used as next call's marker.
  """
    if prefix:
        common.validate_bucket_path(path_prefix)
        bucket = path_prefix
    else:
        bucket, prefix = common._process_path_prefix(path_prefix)

    if marker and marker.startswith(bucket):
        marker = marker[len(bucket) + 1:]

    api = storage_api._get_storage_api(retry_params=retry_params,
                                       account_id=_account_id)
    options = {}
    if marker:
        options['marker'] = marker
    if max_keys:
        options['max-keys'] = max_keys
    if prefix:
        options['prefix'] = prefix
    if delimiter:
        options['delimiter'] = delimiter

    return _Bucket(api, bucket, options)
def listbucket(path_prefix, marker=None, prefix=None, max_keys=None,
               delimiter=None, retry_params=None, _account_id=None):
  """Returns a GCSFileStat iterator over a bucket.

  Optional arguments can limit the result to a subset of files under bucket.

  This function has two modes:
  1. List bucket mode: Lists all files in the bucket without any concept of
     hierarchy. GCS doesn't have real directory hierarchies.
  2. Directory emulation mode: If you specify the 'delimiter' argument,
     it is used as a path separator to emulate a hierarchy of directories.
     In this mode, the "path_prefix" argument should end in the delimiter
     specified (thus designates a logical directory). The logical directory's
     contents, both files and subdirectories, are listed. The names of
     subdirectories returned will end with the delimiter. So listbucket
     can be called with the subdirectory name to list the subdirectory's
     contents.

  Args:
    path_prefix: A Google Cloud Storage path of format "/bucket" or
      "/bucket/prefix". Only objects whose fullpath starts with the
      path_prefix will be returned.
    marker: Another path prefix. Only objects whose fullpath starts
      lexicographically after marker will be returned (exclusive).
    prefix: Deprecated. Use path_prefix.
    max_keys: The limit on the number of objects to return. int.
      For best performance, specify max_keys only if you know how many objects
      you want. Otherwise, this method requests large batches and handles
      pagination for you.
    delimiter: Use to turn on directory mode. str of one or multiple chars
      that your bucket uses as its directory separator.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Examples:
    For files "/bucket/a",
              "/bucket/bar/1"
              "/bucket/foo",
              "/bucket/foo/1", "/bucket/foo/2/1", "/bucket/foo/3/1",

    Regular mode:
    listbucket("/bucket/f", marker="/bucket/foo/1")
    will match "/bucket/foo/2/1", "/bucket/foo/3/1".

    Directory mode:
    listbucket("/bucket/", delimiter="/")
    will match "/bucket/a, "/bucket/bar/" "/bucket/foo", "/bucket/foo/".
    listbucket("/bucket/foo/", delimiter="/")
    will match "/bucket/foo/1", "/bucket/foo/2/", "/bucket/foo/3/"

  Returns:
    Regular mode:
    A GCSFileStat iterator over matched files ordered by filename.
    The iterator returns GCSFileStat objects. filename, etag, st_size,
    st_ctime, and is_dir are set.

    Directory emulation mode:
    A GCSFileStat iterator over matched files and directories ordered by
    name. The iterator returns GCSFileStat objects. For directories,
    only the filename and is_dir fields are set.

    The last name yielded can be used as next call's marker.
  """
  if prefix:
    common.validate_bucket_path(path_prefix)
    bucket = path_prefix
  else:
    bucket, prefix = common._process_path_prefix(path_prefix)

  if marker and marker.startswith(bucket):
    marker = marker[len(bucket) + 1:]

  api = storage_api._get_storage_api(retry_params=retry_params,
                                     account_id=_account_id)
  options = {}
  if marker:
    options['marker'] = marker
  if max_keys:
    options['max-keys'] = max_keys
  if prefix:
    options['prefix'] = prefix
  if delimiter:
    options['delimiter'] = delimiter

  return _Bucket(api, bucket, options)