Beispiel #1
0
def stat(filename, retry_params=None, _account_id=None):
    """Get GCSFileStat of a Google Cloud storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Returns:
    a GCSFileStat object containing info about this file.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
    common.validate_file_path(filename)
    api = storage_api._get_storage_api(retry_params=retry_params,
                                       account_id=_account_id)
    status, headers, content = api.head_object(
        api_utils._quote_filename(filename))
    errors.check_status(status, [200],
                        filename,
                        resp_headers=headers,
                        body=content)
    file_stat = common.GCSFileStat(
        filename=filename,
        st_size=common.get_stored_content_length(headers),
        st_ctime=common.http_time_to_posix(headers.get('last-modified')),
        etag=headers.get('etag'),
        content_type=headers.get('content-type'),
        metadata=common.get_metadata(headers))

    return file_stat
def stat(filename, retry_params=None, _account_id=None):
  """Get GCSFileStat of a Google Cloud storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Returns:
    a GCSFileStat object containing info about this file.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
  common.validate_file_path(filename)
  api = storage_api._get_storage_api(retry_params=retry_params,
                                     account_id=_account_id)
  status, headers, content = api.head_object(
      api_utils._quote_filename(filename))
  errors.check_status(status, [200], filename, resp_headers=headers,
                      body=content)
  file_stat = common.GCSFileStat(
      filename=filename,
      st_size=common.get_stored_content_length(headers),
      st_ctime=common.http_time_to_posix(headers.get('last-modified')),
      etag=headers.get('etag'),
      content_type=headers.get('content-type'),
      metadata=common.get_metadata(headers))

  return file_stat
def _copy2(src, dst, metadata=None, retry_params=None):
  """Copy the file content from src to dst.

  Internal use only!

  Args:
    src: /bucket/filename
    dst: /bucket/filename
    metadata: a dict of metadata for this copy. If None, old metadata is copied.
      For example, {'x-goog-meta-foo': 'bar'}.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
  common.validate_file_path(src)
  common.validate_file_path(dst)

  if metadata is None:
    metadata = {}
    copy_meta = 'COPY'
  else:
    copy_meta = 'REPLACE'
  metadata.update({'x-goog-copy-source': src,
                   'x-goog-metadata-directive': copy_meta})

  api = storage_api._get_storage_api(retry_params=retry_params)
  status, resp_headers, content = api.put_object(
      api_utils._quote_filename(dst), headers=metadata)
  errors.check_status(status, [200], src, metadata, resp_headers, body=content)
Beispiel #4
0
def open(filename,
         mode='r',
         content_type=None,
         options=None,
         read_buffer_size=storage_api.ReadBuffer.DEFAULT_BUFFER_SIZE,
         retry_params=None,
         _account_id=None):
    """Opens a Google Cloud Storage file and returns it as a File-like object.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    mode: 'r' for reading mode. 'w' for writing mode.
      In reading mode, the file must exist. In writing mode, a file will
      be created or be overrode.
    content_type: The MIME type of the file. str. Only valid in writing mode.
    options: A str->basestring dict to specify additional headers to pass to
      GCS e.g. {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.
      Supported options are x-goog-acl, x-goog-meta-, cache-control,
      content-disposition, and content-encoding.
      Only valid in writing mode.
      See https://developers.google.com/storage/docs/reference-headers
      for details.
    read_buffer_size: The buffer size for read. Read keeps a buffer
      and prefetches another one. To minimize blocking for large files,
      always read by buffer size. To minimize number of RPC requests for
      small files, set a large buffer size. Max is 30MB.
    retry_params: An instance of api_utils.RetryParams for subsequent calls
      to GCS from this file handle. If None, the default one is used.
    _account_id: Internal-use only.

  Returns:
    A reading or writing buffer that supports File-like interface. Buffer
    must be closed after operations are done.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
    ValueError: invalid open mode or if content_type or options are specified
      in reading mode.
  """
    common.validate_file_path(filename)
    api = storage_api._get_storage_api(retry_params=retry_params,
                                       account_id=_account_id)
    filename = api_utils._quote_filename(filename)

    if mode == 'w':
        common.validate_options(options)
        return storage_api.StreamingBuffer(api, filename, content_type,
                                           options)
    elif mode == 'r':
        if content_type or options:
            raise ValueError('Options and content_type can only be specified '
                             'for writing mode.')
        return storage_api.ReadBuffer(api,
                                      filename,
                                      buffer_size=read_buffer_size)
    else:
        raise ValueError('Invalid mode %s.' % mode)
def open(filename,
         mode='r',
         content_type=None,
         options=None,
         read_buffer_size=storage_api.ReadBuffer.DEFAULT_BUFFER_SIZE,
         retry_params=None,
         _account_id=None):
  """Opens a Google Cloud Storage file and returns it as a File-like object.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    mode: 'r' for reading mode. 'w' for writing mode.
      In reading mode, the file must exist. In writing mode, a file will
      be created or be overrode.
    content_type: The MIME type of the file. str. Only valid in writing mode.
    options: A str->basestring dict to specify additional headers to pass to
      GCS e.g. {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.
      Supported options are x-goog-acl, x-goog-meta-, cache-control,
      content-disposition, and content-encoding.
      Only valid in writing mode.
      See https://developers.google.com/storage/docs/reference-headers
      for details.
    read_buffer_size: The buffer size for read. Read keeps a buffer
      and prefetches another one. To minimize blocking for large files,
      always read by buffer size. To minimize number of RPC requests for
      small files, set a large buffer size. Max is 30MB.
    retry_params: An instance of api_utils.RetryParams for subsequent calls
      to GCS from this file handle. If None, the default one is used.
    _account_id: Internal-use only.

  Returns:
    A reading or writing buffer that supports File-like interface. Buffer
    must be closed after operations are done.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
    ValueError: invalid open mode or if content_type or options are specified
      in reading mode.
  """
  common.validate_file_path(filename)
  api = storage_api._get_storage_api(retry_params=retry_params,
                                     account_id=_account_id)
  filename = api_utils._quote_filename(filename)

  if mode == 'w':
    common.validate_options(options)
    return storage_api.StreamingBuffer(api, filename, content_type, options)
  elif mode == 'r':
    if content_type or options:
      raise ValueError('Options and content_type can only be specified '
                       'for writing mode.')
    return storage_api.ReadBuffer(api,
                                  filename,
                                  buffer_size=read_buffer_size)
  else:
    raise ValueError('Invalid mode %s.' % mode)
def delete(filename, retry_params=None, _account_id=None):
  """Delete a Google Cloud Storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Raises:
    errors.NotFoundError: if the file doesn't exist prior to deletion.
  """
  api = storage_api._get_storage_api(retry_params=retry_params,
                                     account_id=_account_id)
  common.validate_file_path(filename)
  filename = api_utils._quote_filename(filename)
  status, resp_headers, content = api.delete_object(filename)
  errors.check_status(status, [204], filename, resp_headers=resp_headers,
                      body=content)
Beispiel #7
0
def delete(filename, retry_params=None, _account_id=None):
    """Delete a Google Cloud Storage file.

  Args:
    filename: A Google Cloud Storage filename of form '/bucket/filename'.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.

  Raises:
    errors.NotFoundError: if the file doesn't exist prior to deletion.
  """
    api = storage_api._get_storage_api(retry_params=retry_params,
                                       account_id=_account_id)
    common.validate_file_path(filename)
    filename = api_utils._quote_filename(filename)
    status, resp_headers, content = api.delete_object(filename)
    errors.check_status(status, [204],
                        filename,
                        resp_headers=resp_headers,
                        body=content)
Beispiel #8
0
 def testValidatePath(self):
   self.assertRaises(ValueError, common.validate_bucket_path, '/bucke*')
   self.assertRaises(ValueError, common.validate_file_path, None)
   self.assertRaises(ValueError, common.validate_file_path, '/bucketabcd')
   self.assertRaises(TypeError, common.validate_file_path, 1)
   common.validate_file_path('/bucket/file')
   common.validate_file_path('/bucket/dir/dir2/file')
   common.validate_file_path('/bucket/dir/dir2/file' + 'c' * 64)
 def testValidatePath(self):
     self.assertRaises(ValueError, common.validate_bucket_path, "/bucke*")
     self.assertRaises(ValueError, common.validate_file_path, None)
     self.assertRaises(ValueError, common.validate_file_path, "/bucketabcd")
     self.assertRaises(TypeError, common.validate_file_path, 1)
     common.validate_file_path("/bucket/file")
     common.validate_file_path("/bucket/dir/dir2/file")
     common.validate_file_path("/bucket/dir/dir2/file" + "c" * 64)
 def testValidatePath(self):
     self.assertRaises(ValueError, common.validate_bucket_path, '/bucke*')
     self.assertRaises(ValueError, common.validate_file_path, None)
     self.assertRaises(ValueError, common.validate_file_path, '/bucketabcd')
     self.assertRaises(TypeError, common.validate_file_path, 1)
     common.validate_file_path('/bucket/file')
     common.validate_file_path('/bucket/dir/dir2/file')
     common.validate_file_path('/bucket/dir/dir2/file' + 'c' * 64)
Beispiel #11
0
def _copy2(src, dst, metadata=None, retry_params=None):
    """Copy the file content from src to dst.

  Internal use only!

  Args:
    src: /bucket/filename
    dst: /bucket/filename
    metadata: a dict of metadata for this copy. If None, old metadata is copied.
      For example, {'x-goog-meta-foo': 'bar'}.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.

  Raises:
    errors.AuthorizationError: if authorization failed.
    errors.NotFoundError: if an object that's expected to exist doesn't.
  """
    common.validate_file_path(src)
    common.validate_file_path(dst)

    if metadata is None:
        metadata = {}
        copy_meta = 'COPY'
    else:
        copy_meta = 'REPLACE'
    metadata.update({
        'x-goog-copy-source': src,
        'x-goog-metadata-directive': copy_meta
    })

    api = storage_api._get_storage_api(retry_params=retry_params)
    status, resp_headers, content = api.put_object(
        api_utils._quote_filename(dst), headers=metadata)
    errors.check_status(status, [200],
                        src,
                        metadata,
                        resp_headers,
                        body=content)
def compose(list_of_files, destination_file, preserve_order=True,
            content_type=None, retry_params=None, _account_id=None):
  """
    Internal only!
    Should only be used when the included cloudstorage lib
      does not contain the compose functionality
    Runs the GCS Compose on the inputed files.
    Merges between 2 and 1024 files into one file.
    Automatically breaks down the files into batches of 32.
    There is an option to sort naturally.
  Args:
    list_of_files: list of dictionaries with the following format:
      {"file_name" : REQUIRED name of the file to be merged. Do not include the bucket name,
       "Generation" : OPTIONAL Used to specify what version of a file to use,
       "IfGenerationMatch" : OPTIONAL Used to fail requests if versions don't match}
    destination_file: Path to the desired output file. Must have the bucket in the path.
    preserve_order: If true will not sort the files into natural order.
    content_type: Used to specify the content-header of the output.
                  If None will try to guess based off the first file.
    retry_params: An api_utils.RetryParams for this call to GCS. If None,
    the default one is used.
    _account_id: Internal-use only.

  Raises:
    TypeError: If the dictionary for the file list is malformed
    ValueError: If the number of files is outside the range of 2-1024
    errors.NotFoundError: If any element in the file list is missing the "file_name" key
  """
  def _alphanum_key(input_string):
    """
      Internal use only.
      Splits the file names up to allow natural sorting
    """
    return [ int(char) if char.isdigit() else char for char in re.split('([0-9]+)', input_string) ]
  # pylint: disable=too-many-locals
  def _make_api_call(bucket, file_list, destination_file, content_type, retry_params, _account_id):
    """
        Internal Only
        Makes the actual calls.
        Currently stubbed because the dev server cloudstorage_stub.py
          does not handle compose requests.
        TODO: When the dev server gets patch please remove the stub
    Args:
      bucket: Bucket where the files are kept
      file_list: list of dicts with the file name (see compose argument "list_of_files" for format).
      destination_file: Path to the destination file.
      content_type: Content type for the destination file.
      retry_params: An api_utils.RetryParams for this call to GCS. If None,
      the default one is used.
    _account_id: Internal-use only.
    """
    if len(file_list) == 0:
      raise ValueError("Unable to merge 0 files")
    if len(file_list) == 1:
      _copy2(bucket + file_list[0]["file_name"], destination_file)
      return
    '''
    Needed until cloudstorage_stub.py is updated to accept compose requests
    TODO: When patched remove the True flow from this if.
    '''
    if 'development' in os.environ.get('SERVER_SOFTWARE', '').lower():
      '''
      Below is making the call to the Development server
      '''
      with open(destination_file, "w", content_type=content_type) as gcs_merge:
        for source_file in file_list:
          try:
            with open(bucket + source_file['file_name'], "r") as gcs_source:
              gcs_merge.write(gcs_source.read())
          except cloud_errors.NotFoundError:
            logging.warn("File not found %s, skipping", source_file['file_name'])
    else:
      '''
      Below is making the call to the Production server
      '''
      xml = ""
      for item in file_list:
        generation = item.get("Generation", "")
        generation_match = item.get("IfGenerationMatch", "")
        if generation != "":
          generation = "<Generation>%s</Generation>" % generation
        if generation_match != "":
          generation_match = "<IfGenerationMatch>%s</IfGenerationMatch>" % generation_match
        xml += "<Component><Name>%s</Name>%s%s</Component>" % \
                  (item["file_name"], generation, generation_match)
      xml = "<ComposeRequest>%s</ComposeRequest>" % xml
      logging.info(xml)
      # pylint: disable=protected-access
      api = cloudstorage.storage_api._get_storage_api(retry_params=retry_params,
                                   account_id=_account_id)
      headers = {"Content-Type" : content_type}
      # pylint: disable=no-member
      status, resp_headers, content = api.put_object(
                cloudstorage.api_utils._quote_filename(destination_file) + "?compose",
                                          payload=xml,
                                          headers=headers)
      # TODO: confirm whether [200] is sufficient, or if 204 etc. might be returned?
      cloud_errors.check_status(status, [200], destination_file, resp_headers, body=content)
  '''
    Actual start of the compose call. The above is inside to prevent calls to it directly
  '''
  temp_file_suffix = "____MergeTempFile"
  # Make a copy of the list as they are passed ref
  file_list = list_of_files[:]
  if not isinstance(file_list, list):
    raise TypeError("file_list must be a list of dictionaries")
  list_len = len(file_list)
  if list_len > 1024:
    raise ValueError(
          "Compose attempted to create composite with too many (%i) components; limit is (1024)." \
                      % list_len)
  if list_len <= 1:
    raise ValueError("Compose operation requires at least two components; %i provided." % list_len)

  common.validate_file_path(destination_file)
  bucket = "/" + destination_file.split("/")[1] + "/"
  for source_file in file_list:
    if not isinstance(source_file, dict):
      raise TypeError("Each item of file_list must be dictionary")
    file_name = source_file.get("file_name", None)
    if file_name is None:
      raise cloud_errors.NotFoundError("Each item in file_list must specify a file_name")
    if file_name.startswith(bucket):
      logging.warn("Detected bucket name at the start of the file, " + \
                   "must not specify the bucket when listing file_names." + \
                   " May cause files to be miss read")
    common.validate_file_path(bucket + source_file['file_name'])
  if content_type is None:
    if file_exists(bucket + list_of_files[0]["file_name"]):
      content_type = cloudstorage.stat(bucket + list_of_files[0]["file_name"]).content_type
    else:
      logging.warn("Unable to read first file to divine content type, using text/plain")
      content_type = "text/plain"
  # Sort naturally if the flag is false
  if not preserve_order:
    file_list.sort(key=lambda x: _alphanum_key(x['file_name']))
  '''
  Compose can only handle 32 files at a time. Breaks down the list into batches of 32
  (this will only need to happen once, since the file_list size restriction is 1024 = 32 * 32)
  '''
  temp_list = []  # temporary storage for the filenames that store the merged segments of 32
  if len(file_list) > 32:
    temp_file_counter = 0
    segments_list = [file_list[i:i + 32] for i in range(0, len(file_list), 32)]
    file_list = []
    for segment in segments_list:
      temp_file_name = destination_file + temp_file_suffix + str(temp_file_counter)
      _make_api_call(bucket, segment, temp_file_name, content_type, retry_params, _account_id)
      file_list.append({"file_name" : temp_file_name.replace(bucket, "", 1)})
      temp_file_counter += 1
      temp_list.append(temp_file_name)
  # There will always be 32 or less files to merge at this point
  _make_api_call(bucket, file_list, destination_file, content_type, retry_params, _account_id)
  # grab all temp files that were created during the merging of segments of 32
  temp_list = cloudstorage.listbucket(destination_file + temp_file_suffix)
  # delete all the now-unneeded temporary merge-files for the segments of 32 (if any)
  for item in temp_list:
    try:
      cloudstorage.delete(item.filename)
    except cloud_errors.NotFoundError:
      pass
def _validate_compose_list(destination_file,
                           file_list,
                           files_metadata=None,
                           number_of_files=32):
    """Validates the file_list and merges the file_list, files_metadata.
  Args:
    destination: Path to the file (ie. /destination_bucket/destination_file).
    file_list: List of files to compose, see compose for details.
    files_metadata: Meta details for each file in the file_list.
    number_of_files: Maximum number of files allowed in the list.
  Returns:
    A tuple (list_of_files, bucket):
      list_of_files: Ready to use dict version of the list.
      bucket: bucket name extracted from the file paths.
  """
    common.validate_file_path(destination_file)
    bucket = destination_file[0:(destination_file.index('/', 1) + 1)]
    try:
        if isinstance(file_list, types.StringTypes):
            raise TypeError
        list_len = len(file_list)
    except TypeError:
        raise TypeError('file_list must be a list')

    if list_len > number_of_files:
        raise ValueError('Compose attempted to create composite with too many'
                         '(%i) components; limit is (%i).' %
                         (list_len, number_of_files))
    if list_len <= 1:
        raise ValueError('Compose operation requires at'
                         ' least two components; %i provided.' % list_len)

    if files_metadata is None:
        files_metadata = []
    elif len(files_metadata) > list_len:
        raise ValueError('files_metadata contains more entries(%i)'
                         ' than file_list(%i)' %
                         (len(files_metadata), list_len))
    list_of_files = []
    for source_file, meta_data in itertools.izip_longest(
            file_list, files_metadata):
        if not isinstance(source_file, basestring):
            raise TypeError('Each item of file_list must be a string')
        if source_file.startswith('/'):
            log.warn('Detected a "/" at the start of the file, '
                     'Unless the file name contains a "/" it '
                     ' may cause files to be misread')
        if source_file.startswith(bucket):
            log.warn('Detected bucket name at the start of the file, '
                     'must not specify the bucket when listing file_names.'
                     ' May cause files to be misread')
        common.validate_file_path(bucket + source_file)

        list_entry = {}

        if meta_data is not None:
            list_entry.update(meta_data)
        list_entry["Name"] = source_file
        list_of_files.append(list_entry)

    return list_of_files, bucket