Beispiel #1
0
  def test_RewriteTrackerFile(self):
    """Tests Rewrite tracker file functions."""
    tracker_file_name = GetRewriteTrackerFilePath('bk1', 'obj1', 'bk2', 'obj2',
                                                  self.test_api)
    # Should succeed regardless of whether it exists.
    DeleteTrackerFile(tracker_file_name)
    src_obj_metadata = apitools_messages.Object(
        bucket='bk1', name='obj1', etag='etag1', md5Hash='12345')
    src_obj2_metadata = apitools_messages.Object(
        bucket='bk1', name='obj1', etag='etag2', md5Hash='67890')
    dst_obj_metadata = apitools_messages.Object(
        bucket='bk2', name='obj2')
    rewrite_token = 'token1'
    self.assertIsNone(ReadRewriteTrackerFile(tracker_file_name,
                                             src_obj_metadata))
    rewrite_params_hash = HashRewriteParameters(
        src_obj_metadata, dst_obj_metadata, 'full')
    WriteRewriteTrackerFile(tracker_file_name, rewrite_params_hash,
                            rewrite_token)
    self.assertEqual(
        ReadRewriteTrackerFile(tracker_file_name, rewrite_params_hash),
        rewrite_token)

    # Tracker file for an updated source object (with non-matching etag/md5)
    # should return None.
    rewrite_params_hash2 = HashRewriteParameters(
        src_obj2_metadata, dst_obj_metadata, 'full')

    self.assertIsNone(ReadRewriteTrackerFile(tracker_file_name,
                                             rewrite_params_hash2))
    DeleteTrackerFile(tracker_file_name)
    
  def testWarnIfMvEarlyDeletionChargeApplies(self):
    """Tests that WarnIfEarlyDeletionChargeApplies warns when appropriate."""
    test_logger = logging.Logger('test')
    src_url = StorageUrlFromString('gs://bucket/object')

    # Recent nearline objects should generate a warning.
    for object_time_created in (
        self._PI_DAY, self._PI_DAY - datetime.timedelta(days=29, hours=23)):
      recent_nearline_obj = apitools_messages.Object(
          storageClass='NEARLINE',
          timeCreated=object_time_created)

      with mock.patch.object(test_logger, 'warn') as mocked_warn:
        WarnIfMvEarlyDeletionChargeApplies(src_url, recent_nearline_obj,
                                           test_logger)
        mocked_warn.assert_called_with(
            'Warning: moving %s object %s may incur an early deletion '
            'charge, because the original object is less than %s days old '
            'according to the local system time.', 'nearline',
            src_url.url_string, 30)

    # Recent coldine objects should generate a warning.
    for object_time_created in (
        self._PI_DAY, self._PI_DAY - datetime.timedelta(days=89, hours=23)):
      recent_nearline_obj = apitools_messages.Object(
          storageClass='COLDLINE',
          timeCreated=object_time_created)

      with mock.patch.object(test_logger, 'warn') as mocked_warn:
        WarnIfMvEarlyDeletionChargeApplies(src_url, recent_nearline_obj,
                                           test_logger)
        mocked_warn.assert_called_with(
            'Warning: moving %s object %s may incur an early deletion '
            'charge, because the original object is less than %s days old '
            'according to the local system time.', 'coldline',
            src_url.url_string, 90)

    # Sufficiently old objects should not generate a warning.
    with mock.patch.object(test_logger, 'warn') as mocked_warn:
      old_nearline_obj = apitools_messages.Object(
          storageClass='NEARLINE',
          timeCreated=self._PI_DAY - datetime.timedelta(days=30, seconds=1))
      WarnIfMvEarlyDeletionChargeApplies(src_url, old_nearline_obj, test_logger)
      mocked_warn.assert_not_called()
    with mock.patch.object(test_logger, 'warn') as mocked_warn:
      old_coldline_obj = apitools_messages.Object(
          storageClass='COLDLINE',
          timeCreated=self._PI_DAY - datetime.timedelta(days=90, seconds=1))
      WarnIfMvEarlyDeletionChargeApplies(src_url, old_coldline_obj, test_logger)
      mocked_warn.assert_not_called()

    # Recent standard storage class object should not generate a warning.
    with mock.patch.object(test_logger, 'warn') as mocked_warn:
      not_old_enough_nearline_obj = apitools_messages.Object(
          storageClass='STANDARD',
          timeCreated=self._PI_DAY)
      WarnIfMvEarlyDeletionChargeApplies(src_url, not_old_enough_nearline_obj,
                                         test_logger)
      mocked_warn.assert_not_called()
Beispiel #3
0
  def testRetryableErrorMetadataCollection(self):
    """Tests that retryable errors are collected on JSON metadata operations."""
    # Retryable errors will only be collected with the JSON API.
    if self.test_api != ApiSelector.JSON:
      return unittest.skip('Retryable errors are only collected in JSON')

    bucket_uri = self.CreateBucket()
    object_uri = self.CreateObject(bucket_uri=bucket_uri,
                                   object_name='foo', contents=b'bar')
    # Set the command name to rsync in order to collect PerformanceSummary info.
    self.collector.ga_params[metrics._GA_LABEL_MAP['Command Name']] = 'rsync'
    # Generate a JSON API instance to test with, because the RunGsUtil method
    # may use the XML API.
    gsutil_api = GcsJsonApi(BucketStorageUri, logging.getLogger(),
                            RetryableErrorsQueue(), self.default_provider)
    # Don't wait for too many retries or for long periods between retries to
    # avoid long tests.
    gsutil_api.api_client.num_retries = 2
    gsutil_api.api_client.max_retry_wait = 1

    # Throw an error when transferring metadata.
    key = object_uri.get_key()
    src_obj_metadata = apitools_messages.Object(name=key.name,
                                                bucket=key.bucket.name,
                                                contentType=key.content_type)
    dst_obj_metadata = apitools_messages.Object(
        bucket=src_obj_metadata.bucket,
        name=self.MakeTempName('object'),
        contentType=src_obj_metadata.contentType)
    with mock.patch.object(http_wrapper, '_MakeRequestNoRetry',
                           side_effect=socket.error()):
      _TryExceptAndPass(gsutil_api.CopyObject, src_obj_metadata,
                        dst_obj_metadata)
    if six.PY2:
      self.assertEqual(self.collector.retryable_errors['SocketError'], 1)
    else:
      # In PY3, socket.* errors are deprecated aliases for OSError
      self.assertEqual(self.collector.retryable_errors['OSError'], 1)

    # Throw an error when removing a bucket.
    with mock.patch.object(
        http_wrapper, '_MakeRequestNoRetry',
        side_effect=apitools_exceptions.HttpError('unused', 'unused',
                                                  'unused')):
      _TryExceptAndPass(gsutil_api.DeleteObject, bucket_uri.bucket_name,
                        object_uri.object_name)
    self.assertEqual(self.collector.retryable_errors['HttpError'], 1)

    # Check that the number of each kind of retryable error was logged.
    self.assertEqual(
        self.collector.perf_sum_params.num_retryable_network_errors, 1)
    self.assertEqual(
        self.collector.perf_sum_params.num_retryable_service_errors, 1)
Beispiel #4
0
    def _SetObjectCustomMetadataAttribute(self, provider, bucket_name,
                                          object_name, attr_name, attr_value):
        """Sets a custom metadata attribute for an object.

    Args:
      provider: Provider string for the bucket, ex. 'gs' or 's3.
      bucket_name: The name of the bucket the object is in.
      object_name: The name of the object itself.
      attr_name: The name of the custom metadata attribute to set.
      attr_value: The value of the custom metadata attribute to set.

    Returns:
      None
    """
        obj_metadata = apitools_messages.Object()
        obj_metadata.metadata = CreateCustomMetadata({attr_name: attr_value})
        if provider == 'gs':
            self.json_api.PatchObjectMetadata(bucket_name,
                                              object_name,
                                              obj_metadata,
                                              provider=provider)
        else:
            self.xml_api.PatchObjectMetadata(bucket_name,
                                             object_name,
                                             obj_metadata,
                                             provider=provider)
 def SetPOSIXMetadata(self, provider, bucket_name, object_name, atime=None,
                      mtime=None, uid=None, gid=None, mode=None):
   """Sets POSIX metadata for the object."""
   obj_metadata = apitools_messages.Object()
   obj_metadata.metadata = apitools_messages.Object.MetadataValue(
       additionalProperties=[])
   if atime is not None:
     CreateCustomMetadata(entries={ATIME_ATTR: atime},
                          custom_metadata=obj_metadata.metadata)
   if mode is not None:
     CreateCustomMetadata(entries={MODE_ATTR: mode},
                          custom_metadata=obj_metadata.metadata)
   if mtime is not None:
     CreateCustomMetadata(entries={MTIME_ATTR: mtime},
                          custom_metadata=obj_metadata.metadata)
   if uid is not None:
     CreateCustomMetadata(entries={UID_ATTR: uid},
                          custom_metadata=obj_metadata.metadata)
   if gid is not None:
     CreateCustomMetadata(entries={GID_ATTR: gid},
                          custom_metadata=obj_metadata.metadata)
   if provider == 'gs':
     self.json_api.PatchObjectMetadata(bucket_name, object_name, obj_metadata,
                                       provider=provider)
   else:
     self.xml_api.PatchObjectMetadata(bucket_name, object_name, obj_metadata,
                                      provider=provider)
Beispiel #6
0
def ReleaseEventHoldFuncWrapper(cls, name_expansion_result, thread_state=None):
    log_template = 'Releasing Event-Based Hold on %s...'
    object_metadata_update = apitools_messages.Object(eventBasedHold=False)
    cls.ObjectUpdateMetadataFunc(object_metadata_update,
                                 log_template,
                                 name_expansion_result,
                                 thread_state=thread_state)
Beispiel #7
0
def SetTempHoldFuncWrapper(cls, name_expansion_result, thread_state=None):
    log_template = 'Setting Temporary Hold on %s...'
    object_metadata_update = apitools_messages.Object(temporaryHold=True)
    cls.ObjectUpdateMetadataFunc(object_metadata_update,
                                 log_template,
                                 name_expansion_result,
                                 thread_state=thread_state)
Beispiel #8
0
  def RunCommand(self):
    """Command entry point for the compose command."""
    target_url_str = self.args[-1]
    self.args = self.args[:-1]
    target_url = StorageUrlFromString(target_url_str)
    self.CheckProvider(target_url)
    if target_url.HasGeneration():
      raise CommandException('A version-specific URL (%s) cannot be '
                             'the destination for gsutil compose - abort.'
                             % target_url)

    dst_obj_metadata = apitools_messages.Object(name=target_url.object_name,
                                                bucket=target_url.bucket_name)

    components = []
    # Remember the first source object so we can get its content type.
    first_src_url = None
    for src_url_str in self.args:
      if ContainsWildcard(src_url_str):
        src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
      else:
        src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))]
      for blr in src_url_iter:
        src_url = blr.storage_url
        self.CheckProvider(src_url)

        if src_url.bucket_name != target_url.bucket_name:
          raise CommandException(
              'GCS does not support inter-bucket composing.')

        if not first_src_url:
          first_src_url = src_url
        src_obj_metadata = (
            apitools_messages.ComposeRequest.SourceObjectsValueListEntry(
                name=src_url.object_name))
        if src_url.HasGeneration():
          src_obj_metadata.generation = src_url.generation
        components.append(src_obj_metadata)
        # Avoid expanding too many components, and sanity check each name
        # expansion result.
        if len(components) > MAX_COMPOSE_ARITY:
          raise CommandException('"compose" called with too many component '
                                 'objects. Limit is %d.' % MAX_COMPOSE_ARITY)

    if not components:
      raise CommandException('"compose" requires at least 1 component object.')

    dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
        first_src_url.bucket_name, first_src_url.object_name,
        provider=first_src_url.scheme, fields=['contentType']).contentType

    preconditions = PreconditionsFromHeaders(self.headers or {})

    self.logger.info(
        'Composing %s from %d component object(s).',
        target_url, len(components))
    self.gsutil_api.ComposeObject(
        components, dst_obj_metadata, preconditions=preconditions,
        provider=target_url.scheme,
        encryption_tuple=GetEncryptionKeyWrapper(config))
Beispiel #9
0
    def CreateObjectJson(self,
                         contents,
                         bucket_name=None,
                         object_name=None,
                         encryption_key=None):
        """Creates a test object (GCS provider only) using the JSON API.

    Args:
      contents: The contents to write to the object.
      bucket_name: Name of bucket to place the object in. If not
                   specified, a new temporary bucket is created.
      object_name: The name to use for the object. If not specified, a temporary
                   test object name is constructed.
      encryption_key: AES256 encryption key to use when creating the object,
          if any.

    Returns:
      An apitools Object for the created object.
    """
        bucket_name = bucket_name or self.CreateBucketJson().name
        object_name = object_name or self.MakeTempName('obj')
        object_metadata = apitools_messages.Object(
            name=object_name,
            bucket=bucket_name,
            contentType='application/octet-stream')
        encryption_tuple = None
        if encryption_key:
            encryption_tuple = CryptoTuple(encryption_key)
        return self.json_api.UploadObject(cStringIO.StringIO(contents),
                                          object_metadata,
                                          provider='gs',
                                          encryption_tuple=encryption_tuple)
Beispiel #10
0
  def CreateObjectJson(self, contents, bucket_name=None, object_name=None,
                       encryption_key=None, mtime=None, storage_class=None,
                       gs_idempotent_generation=None, kms_key_name=None):
    """Creates a test object (GCS provider only) using the JSON API.

    Args:
      contents: The contents to write to the object.
      bucket_name: Name of bucket to place the object in. If not specified,
          a new temporary bucket is created. Assumes the given bucket name is
          valid.
      object_name: The name to use for the object. If not specified, a temporary
          test object name is constructed.
      encryption_key: AES256 encryption key to use when creating the object,
          if any.
      mtime: The modification time of the file in POSIX time (seconds since
          UTC 1970-01-01). If not specified, this defaults to the current
          system time.
      storage_class: String representing the storage class to use for the
          object.
      gs_idempotent_generation: For use when overwriting an object for which
          you know the previously uploaded generation. Create GCS object
          idempotently by supplying this generation number as a precondition
          and assuming the current object is correct on precondition failure.
          Defaults to 0 (new object); to disable, set to None.
      kms_key_name: Fully-qualified name of the KMS key that should be used to
          encrypt the object. Note that this is currently only valid for 'gs'
          objects.

    Returns:
      An apitools Object for the created object.
    """
    bucket_name = bucket_name or self.CreateBucketJson().name
    object_name = object_name or self.MakeTempName('obj')
    preconditions = Preconditions(gen_match=gs_idempotent_generation)
    custom_metadata = apitools_messages.Object.MetadataValue(
        additionalProperties=[])
    if mtime is not None:
      CreateCustomMetadata({MTIME_ATTR: mtime}, custom_metadata)
    object_metadata = apitools_messages.Object(
        name=object_name,
        metadata=custom_metadata,
        bucket=bucket_name,
        contentType='application/octet-stream',
        storageClass=storage_class,
        kmsKeyName=kms_key_name)
    encryption_keywrapper = CryptoKeyWrapperFromKey(encryption_key)
    try:
      return self.json_api.UploadObject(
          cStringIO.StringIO(contents),
          object_metadata, provider='gs',
          encryption_tuple=encryption_keywrapper,
          preconditions=preconditions)
    except PreconditionException:
      if gs_idempotent_generation is None:
        raise
      with SetBotoConfigForTest([('GSUtil', 'decryption_key1',
                                  encryption_key)]):
        return self.json_api.GetObjectMetadata(bucket_name, object_name)
Beispiel #11
0
    def testRetryableErrorMetadataCollection(self):
        """Tests that retryable errors are collected on JSON metadata operations."""
        # Retryable errors will only be collected with the JSON API.
        if self.test_api != ApiSelector.JSON:
            return unittest.skip('Retryable errors are only collected in JSON')

        bucket_uri = self.CreateBucket()
        object_uri = self.CreateObject(bucket_uri=bucket_uri,
                                       object_name='foo',
                                       contents='bar')
        # Generate a JSON API instance because the RunGsUtil method uses the XML
        # API.
        gsutil_api = GcsJsonApi(BucketStorageUri, logging.getLogger(),
                                RetryableErrorsQueue(), self.default_provider)
        # Don't wait for too many retries or for long periods between retries to
        # avoid long tests.
        gsutil_api.api_client.num_retries = 2
        gsutil_api.api_client.max_retry_wait = 1

        # Throw an error when transferring metadata.
        key = object_uri.get_key()
        src_obj_metadata = apitools_messages.Object(
            name=key.name,
            bucket=key.bucket.name,
            contentType=key.content_type)
        dst_obj_metadata = apitools_messages.Object(
            bucket=src_obj_metadata.bucket,
            name=self.MakeTempName('object'),
            contentType=src_obj_metadata.contentType)
        with mock.patch.object(http_wrapper,
                               '_MakeRequestNoRetry',
                               side_effect=socket.error()):
            _TryExceptAndPass(gsutil_api.CopyObject, src_obj_metadata,
                              dst_obj_metadata)
        self.assertEqual(self.collector.retryable_errors['SocketError'], 1)

        # Throw an error when removing a bucket.
        with mock.patch.object(http_wrapper,
                               '_MakeRequestNoRetry',
                               side_effect=ValueError()):
            _TryExceptAndPass(gsutil_api.DeleteObject, bucket_uri.bucket_name,
                              object_uri.object_name)
        self.assertEqual(self.collector.retryable_errors['ValueError'], 1)
Beispiel #12
0
  def ApplyAclChanges(self, name_expansion_result, thread_state=None):
    """Applies the changes in self.changes to the provided URL.

    Args:
      name_expansion_result: NameExpansionResult describing the target object.
      thread_state: If present, gsutil Cloud API instance to apply the changes.
    """
    if thread_state:
      gsutil_api = thread_state
    else:
      gsutil_api = self.gsutil_api

    url = name_expansion_result.expanded_storage_url

    if url.IsBucket():
      bucket = gsutil_api.GetBucket(url.bucket_name, provider=url.scheme,
                                    fields=['acl', 'metageneration'])
      current_acl = bucket.acl
    elif url.IsObject():
      gcs_object = encoding.JsonToMessage(apitools_messages.Object,
                                          name_expansion_result.expanded_result)
      current_acl = gcs_object.acl
    if not current_acl:
      self._RaiseForAccessDenied(url)

    modification_count = 0
    for change in self.changes:
      modification_count += change.Execute(url, current_acl, 'acl', self.logger)
    if modification_count == 0:
      self.logger.info('No changes to %s', url)
      return

    try:
      if url.IsBucket():
        preconditions = Preconditions(meta_gen_match=bucket.metageneration)
        bucket_metadata = apitools_messages.Bucket(acl=current_acl)
        gsutil_api.PatchBucket(url.bucket_name, bucket_metadata,
                               preconditions=preconditions,
                               provider=url.scheme, fields=['id'])
      else:  # Object
        preconditions = Preconditions(gen_match=gcs_object.generation,
                                      meta_gen_match=gcs_object.metageneration)

        object_metadata = apitools_messages.Object(acl=current_acl)
        gsutil_api.PatchObjectMetadata(
            url.bucket_name, url.object_name, object_metadata,
            preconditions=preconditions, provider=url.scheme,
            generation=url.generation, fields=['id'])
    except BadRequestException as e:
      # Don't retry on bad requests, e.g. invalid email address.
      raise CommandException('Received bad request from server: %s' % str(e))
    except AccessDeniedException:
      self._RaiseForAccessDenied(url)

    self.logger.info('Updated ACL on %s', url)
  def testNoSlashFolderPlaceholder(self, mock_gsutil_api):
    mock_gsutil_api.ListObjects.return_value = iter([
        CloudApi.CsObjectOrPrefix(
            apitools_messages.Object(name='folder_$folder$'),
            CloudApi.CsObjectOrPrefixType.OBJECT)
    ])
    (exp_url, have_existing_dst_container) = ExpandUrlToSingleBlr(
        'gs://test/folder', mock_gsutil_api, 'project_id', False,
        CreateOrGetGsutilLogger('copy_test'))

    self.assertTrue(have_existing_dst_container)
    self.assertEqual(exp_url, StorageUrlFromString('gs://test/folder'))
Beispiel #14
0
def _GetFileObject(filepath):
    """Returns an apitools Object class with supported file attributes.

  To provide size estimates for local to cloud file copies, we need to retrieve
  expose the local file's size.

  Args:
    filepath: Path to the file.

  Returns:
    apitools Object that with file name and size attributes filled-in.
  """
    return apitools_messages.Object(size=os.path.getsize(filepath))
def _GetFileObject(filepath):
    """Returns an apitools Object class with supported file attributes.

  To provide size estimates for local to cloud file copies, we need to retrieve
  expose the local file's size.

  Args:
    filepath: Path to the file.

  Returns:
    apitools Object that with file name and size attributes filled-in.
  """
    # TODO: If we are preserving POSIX attributes, we could instead call
    # os.stat() here.
    return apitools_messages.Object(size=os.path.getsize(filepath))
    def CreateObjectJson(self,
                         contents,
                         bucket_name=None,
                         object_name=None,
                         encryption_key=None,
                         mtime=None,
                         storage_class=None):
        """Creates a test object (GCS provider only) using the JSON API.

    Args:
      contents: The contents to write to the object.
      bucket_name: Name of bucket to place the object in. If not specified,
          a new temporary bucket is created.
      object_name: The name to use for the object. If not specified, a temporary
          test object name is constructed.
      encryption_key: AES256 encryption key to use when creating the object,
          if any.
      mtime: The modification time of the file in POSIX time (seconds since
          UTC 1970-01-01). If not specified, this defaults to the current
          system time.
      storage_class: String representing the storage class to use for the
          object.

    Returns:
      An apitools Object for the created object.
    """
        bucket_name = bucket_name or self.CreateBucketJson().name
        object_name = object_name or self.MakeTempName('obj')
        custom_metadata = apitools_messages.Object.MetadataValue(
            additionalProperties=[])
        if mtime is not None:
            CreateCustomMetadata({MTIME_ATTR: mtime}, custom_metadata)
        object_metadata = apitools_messages.Object(
            name=object_name,
            metadata=custom_metadata,
            bucket=bucket_name,
            contentType='application/octet-stream',
            storageClass=storage_class)
        encryption_tuple = None
        if encryption_key:
            encryption_tuple = CryptoTuple(encryption_key)
        return self.json_api.UploadObject(cStringIO.StringIO(contents),
                                          object_metadata,
                                          provider='gs',
                                          encryption_tuple=encryption_tuple)
Beispiel #17
0
  def _RefetchObjectMetadataAndApplyAclChanges(self, url, gsutil_api):
    """Reattempts object ACL changes after a PreconditionException."""
    gcs_object = gsutil_api.GetObjectMetadata(
        url.bucket_name, url.object_name, provider=url.scheme,
        fields=['acl', 'generation', 'metageneration'])
    current_acl = gcs_object.acl

    if self._ApplyAclChangesAndReturnChangeCount(url, current_acl) == 0:
      self.logger.info('No changes to %s', url)
      return

    object_metadata = apitools_messages.Object(acl=current_acl)
    preconditions = Preconditions(gen_match=gcs_object.generation,
                                  meta_gen_match=gcs_object.metageneration)
    gsutil_api.PatchObjectMetadata(
        url.bucket_name, url.object_name, object_metadata,
        preconditions=preconditions, provider=url.scheme,
        generation=gcs_object.generation, fields=['id'])
Beispiel #18
0
  def ApplyAclChanges(self, name_expansion_result, thread_state=None):
    """Applies the changes in self.changes to the provided URL.

    Args:
      name_expansion_result: NameExpansionResult describing the target object.
      thread_state: If present, gsutil Cloud API instance to apply the changes.
    """
    if thread_state:
      gsutil_api = thread_state
    else:
      gsutil_api = self.gsutil_api

    url = name_expansion_result.expanded_storage_url
    if url.IsBucket():
      bucket = gsutil_api.GetBucket(url.bucket_name, provider=url.scheme,
                                    fields=['acl', 'metageneration'])
      current_acl = bucket.acl
    elif url.IsObject():
      gcs_object = encoding.JsonToMessage(apitools_messages.Object,
                                          name_expansion_result.expanded_result)
      current_acl = gcs_object.acl

    if not current_acl:
      self._RaiseForAccessDenied(url)
    if self._ApplyAclChangesAndReturnChangeCount(url, current_acl) == 0:
      self.logger.info('No changes to %s', url)
      return

    try:
      if url.IsBucket():
        preconditions = Preconditions(meta_gen_match=bucket.metageneration)
        bucket_metadata = apitools_messages.Bucket(acl=current_acl)
        gsutil_api.PatchBucket(url.bucket_name, bucket_metadata,
                               preconditions=preconditions,
                               provider=url.scheme, fields=['id'])
      else:  # Object
        preconditions = Preconditions(gen_match=gcs_object.generation,
                                      meta_gen_match=gcs_object.metageneration)
        object_metadata = apitools_messages.Object(acl=current_acl)
        try:
          gsutil_api.PatchObjectMetadata(
              url.bucket_name, url.object_name, object_metadata,
              preconditions=preconditions, provider=url.scheme,
              generation=url.generation, fields=['id'])
        except PreconditionException as e:
          # Special retry case where we want to do an additional step, the read
          # of the read-modify-write cycle, to fetch the correct object
          # metadata before reattempting ACL changes.
          self._RefetchObjectMetadataAndApplyAclChanges(url, gsutil_api)

      self.logger.info('Updated ACL on %s', url)
    except BadRequestException as e:
      # Don't retry on bad requests, e.g. invalid email address.
      raise CommandException('Received bad request from server: %s' % str(e))
    except AccessDeniedException:
      self._RaiseForAccessDenied(url)
    except PreconditionException as e:
      # For objects, retry attempts should have already been handled.
      if url.IsObject():
        raise CommandException(str(e))
      # For buckets, raise PreconditionException and continue to next retry.
      raise e
Beispiel #19
0
def ObjectMetadataFromHeaders(headers):
    """Creates object metadata according to the provided headers.

  gsutil -h allows specifiying various headers (originally intended
  to be passed to boto in gsutil v3).  For the JSON API to be compatible with
  this option, we need to parse these headers into gsutil_api Object fields.

  Args:
    headers: Dict of headers passed via gsutil -h

  Raises:
    ArgumentException if an invalid header is encountered.

  Returns:
    apitools Object with relevant fields populated from headers.
  """
    obj_metadata = apitools_messages.Object()
    for header, value in headers.items():
        if CACHE_CONTROL_REGEX.match(header):
            obj_metadata.cacheControl = value.strip()
        elif CONTENT_DISPOSITION_REGEX.match(header):
            obj_metadata.contentDisposition = value.strip()
        elif CONTENT_ENCODING_REGEX.match(header):
            obj_metadata.contentEncoding = value.strip()
        elif CONTENT_MD5_REGEX.match(header):
            obj_metadata.md5Hash = value.strip()
        elif CONTENT_LANGUAGE_REGEX.match(header):
            obj_metadata.contentLanguage = value.strip()
        elif CONTENT_TYPE_REGEX.match(header):
            if not value:
                obj_metadata.contentType = DEFAULT_CONTENT_TYPE
            else:
                obj_metadata.contentType = value.strip()
        elif GOOG_API_VERSION_REGEX.match(header):
            # API version is only relevant for XML, ignore and rely on the XML API
            # to add the appropriate version.
            continue
        elif GOOG_GENERATION_MATCH_REGEX.match(header):
            # Preconditions are handled elsewhere, but allow these headers through.
            continue
        elif GOOG_METAGENERATION_MATCH_REGEX.match(header):
            # Preconditions are handled elsewhere, but allow these headers through.
            continue
        else:
            custom_goog_metadata_match = CUSTOM_GOOG_METADATA_REGEX.match(
                header)
            custom_amz_metadata_match = CUSTOM_AMZ_METADATA_REGEX.match(header)
            custom_amz_header_match = CUSTOM_AMZ_HEADER_REGEX.match(header)
            header_key = None
            if custom_goog_metadata_match:
                header_key = custom_goog_metadata_match.group('header_key')
            elif custom_amz_metadata_match:
                header_key = custom_amz_metadata_match.group('header_key')
            elif custom_amz_header_match:
                # If we got here we are guaranteed by the prior statement that this is
                # not an x-amz-meta- header.
                header_key = (S3_HEADER_PREFIX +
                              custom_amz_header_match.group('header_key'))
            if header_key:
                if header_key.lower() == 'x-goog-content-language':
                    # Work around content-language being inserted into custom metadata.
                    continue
                if not obj_metadata.metadata:
                    obj_metadata.metadata = apitools_messages.Object.MetadataValue(
                    )
                if not obj_metadata.metadata.additionalProperties:
                    obj_metadata.metadata.additionalProperties = []
                obj_metadata.metadata.additionalProperties.append(
                    apitools_messages.Object.MetadataValue.AdditionalProperty(
                        key=header_key, value=value))
            else:
                raise ArgumentException('Invalid header specifed: %s:%s' %
                                        (header, value))
    return obj_metadata
    def test_FilterExistingComponentsNonVersioned(self):
        """Tests upload with a variety of component states."""
        mock_api = MockCloudApi()
        bucket_name = self.MakeTempName('bucket')
        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # dst_obj_metadata used for passing content-type.
        empty_object = apitools_messages.Object()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        fpath_uploaded_correctly_url = StorageUrlFromString(
            str(fpath_uploaded_correctly))
        object_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly))
        with open(fpath_uploaded_correctly) as f_in:
            fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents(
                f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name,
            name=fpath_uploaded_correctly,
            md5Hash=fpath_uploaded_correctly_md5),
                                              contents='1')

        args_uploaded_correctly = PerformParallelUploadFileToObjectArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url,
            object_uploaded_correctly_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        # Not yet uploaded, but needed.
        fpath_not_uploaded = self.CreateTempFile(file_name='foo2',
                                                 contents='2')
        fpath_not_uploaded_url = StorageUrlFromString(str(fpath_not_uploaded))
        object_not_uploaded_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_not_uploaded))
        args_not_uploaded = PerformParallelUploadFileToObjectArgs(
            fpath_not_uploaded, 0, 1, fpath_not_uploaded_url,
            object_not_uploaded_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        # Already uploaded, but contents no longer match. Even though the contents
        # differ, we don't delete this since the bucket is not versioned and it
        # will be overwritten anyway.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        fpath_wrong_contents_url = StorageUrlFromString(
            str(fpath_wrong_contents))
        object_wrong_contents_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_wrong_contents))
        with open(self.CreateTempFile(contents='_')) as f_in:
            fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name,
            name=fpath_wrong_contents,
            md5Hash=fpath_wrong_contents_md5),
                                              contents='1')

        args_wrong_contents = PerformParallelUploadFileToObjectArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents_url,
            object_wrong_contents_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        # Exists in tracker file, but component object no longer exists.
        fpath_remote_deleted = self.CreateTempFile(file_name='foo5',
                                                   contents='5')
        fpath_remote_deleted_url = StorageUrlFromString(
            str(fpath_remote_deleted))
        args_remote_deleted = PerformParallelUploadFileToObjectArgs(
            fpath_remote_deleted, 0, 1, fpath_remote_deleted_url, '', '',
            empty_object, tracker_file, tracker_file_lock, None)

        # Exists in tracker file and already uploaded, but no longer needed.
        fpath_no_longer_used = self.CreateTempFile(file_name='foo6',
                                                   contents='6')
        with open(fpath_no_longer_used) as f_in:
            file_md5 = CalculateB64EncodedMd5FromContents(f_in)
        mock_api.MockCreateObjectWithMetadata(apitools_messages.Object(
            bucket=bucket_name, name='foo6', md5Hash=file_md5),
                                              contents='6')

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_not_uploaded: args_not_uploaded,
            fpath_wrong_contents: args_wrong_contents,
            fpath_remote_deleted: args_remote_deleted
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly, ''),
            ObjectFromTracker(fpath_wrong_contents, ''),
            ObjectFromTracker(fpath_remote_deleted, ''),
            ObjectFromTracker(fpath_no_longer_used, '')
        ]

        bucket_url = StorageUrlFromString('%s://%s' %
                                          (self.default_provider, bucket_name))

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_url, mock_api))

        for arg in [
                args_not_uploaded, args_wrong_contents, args_remote_deleted
        ]:
            self.assertTrue(arg in components_to_upload)
        self.assertEqual(1, len(uploaded_components))
        self.assertEqual(args_uploaded_correctly.dst_url.url_string,
                         uploaded_components[0].url_string)
        self.assertEqual(1, len(existing_objects_to_delete))
        no_longer_used_url = StorageUrlFromString(
            '%s://%s/%s' %
            (self.default_provider, bucket_name, fpath_no_longer_used))
        self.assertEqual(no_longer_used_url.url_string,
                         existing_objects_to_delete[0].url_string)
    def test_FilterExistingComponentsVersioned(self):
        """Tests upload with versionined parallel components."""

        mock_api = MockCloudApi()
        bucket_name = self.MakeTempName('bucket')
        mock_api.MockCreateVersionedBucket(bucket_name)

        # dst_obj_metadata used for passing content-type.
        empty_object = apitools_messages.Object()

        tracker_file = self.CreateTempFile(file_name='foo', contents='asdf')
        tracker_file_lock = CreateLock()

        # Already uploaded, contents still match, component still used.
        fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1',
                                                       contents='1')
        fpath_uploaded_correctly_url = StorageUrlFromString(
            str(fpath_uploaded_correctly))
        with open(fpath_uploaded_correctly) as f_in:
            fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents(
                f_in)
        object_uploaded_correctly = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_uploaded_correctly,
                                     md5Hash=fpath_uploaded_correctly_md5),
            contents='1')
        object_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly,
             object_uploaded_correctly.generation))
        args_uploaded_correctly = PerformParallelUploadFileToObjectArgs(
            fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url,
            object_uploaded_correctly_url,
            object_uploaded_correctly.generation, empty_object, tracker_file,
            tracker_file_lock, None)

        # Duplicate object name in tracker file, but uploaded correctly.
        fpath_duplicate = fpath_uploaded_correctly
        fpath_duplicate_url = StorageUrlFromString(str(fpath_duplicate))
        duplicate_uploaded_correctly = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_duplicate,
                                     md5Hash=fpath_uploaded_correctly_md5),
            contents='1')
        duplicate_uploaded_correctly_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_uploaded_correctly,
             duplicate_uploaded_correctly.generation))
        args_duplicate = PerformParallelUploadFileToObjectArgs(
            fpath_duplicate, 0, 1, fpath_duplicate_url,
            duplicate_uploaded_correctly_url,
            duplicate_uploaded_correctly.generation, empty_object,
            tracker_file, tracker_file_lock, None)

        # Already uploaded, but contents no longer match.
        fpath_wrong_contents = self.CreateTempFile(file_name='foo4',
                                                   contents='4')
        fpath_wrong_contents_url = StorageUrlFromString(
            str(fpath_wrong_contents))
        with open(self.CreateTempFile(contents='_')) as f_in:
            fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in)
        object_wrong_contents = mock_api.MockCreateObjectWithMetadata(
            apitools_messages.Object(bucket=bucket_name,
                                     name=fpath_wrong_contents,
                                     md5Hash=fpath_wrong_contents_md5),
            contents='_')
        wrong_contents_url = StorageUrlFromString(
            '%s://%s/%s#%s' %
            (self.default_provider, bucket_name, fpath_wrong_contents,
             object_wrong_contents.generation))
        args_wrong_contents = PerformParallelUploadFileToObjectArgs(
            fpath_wrong_contents, 0, 1, fpath_wrong_contents_url,
            wrong_contents_url, '', empty_object, tracker_file,
            tracker_file_lock, None)

        dst_args = {
            fpath_uploaded_correctly: args_uploaded_correctly,
            fpath_wrong_contents: args_wrong_contents
        }

        existing_components = [
            ObjectFromTracker(fpath_uploaded_correctly,
                              object_uploaded_correctly_url.generation),
            ObjectFromTracker(fpath_duplicate,
                              duplicate_uploaded_correctly_url.generation),
            ObjectFromTracker(fpath_wrong_contents,
                              wrong_contents_url.generation)
        ]

        bucket_url = StorageUrlFromString('%s://%s' %
                                          (self.default_provider, bucket_name))

        (components_to_upload, uploaded_components,
         existing_objects_to_delete) = (FilterExistingComponents(
             dst_args, existing_components, bucket_url, mock_api))

        self.assertEqual([args_wrong_contents], components_to_upload)
        self.assertEqual(args_uploaded_correctly.dst_url.url_string,
                         uploaded_components[0].url_string)
        expected_to_delete = [(args_wrong_contents.dst_url.object_name,
                               args_wrong_contents.dst_url.generation),
                              (args_duplicate.dst_url.object_name,
                               args_duplicate.dst_url.generation)]
        for uri in existing_objects_to_delete:
            self.assertTrue((uri.object_name,
                             uri.generation) in expected_to_delete)
        self.assertEqual(len(expected_to_delete),
                         len(existing_objects_to_delete))
Beispiel #22
0
 def CreateObject(self, object_name, contents=''):
     return self.CreateObjectWithMetadata(
         MockObject(
             apitools_messages.Object(name=object_name, contents=contents)))
Beispiel #23
0
  def test_one_object_with_L_storage_class_update(self):
    """Tests the JSON storage class update time field."""
    if self.test_api == ApiSelector.XML:
      return unittest.skip(
          'XML API has no concept of storage class update time')
    # Case 1: Create an object message where Storage class update time is the
    # same as Creation time.
    current_time = datetime(2017, 1, 2, 3, 4, 5, 6, tzinfo=None)
    obj_metadata = apitools_messages.Object(
        name='foo', bucket='bar', timeCreated=current_time,
        updated=current_time, timeStorageClassUpdated=current_time,
        etag='12345')
    # Create mock object to point to obj_metadata.
    obj_ref = mock.Mock()
    obj_ref.root_object = obj_metadata
    obj_ref.url_string = 'foo'

    # Print out attributes of object message.
    with CaptureStdout() as output:
      PrintFullInfoAboutObject(obj_ref)
    output = '\n'.join(output)

    # Verify that no Storage class update time field displays since it's the
    # same as Creation time.
    find_stor_update_re = re.compile(
        r'^\s*Storage class update time:+(?P<stor_update_time_val>.+)$',
        re.MULTILINE)
    stor_update_time_match = re.search(find_stor_update_re, output)
    self.assertIsNone(stor_update_time_match)

    # Case 2: Create an object message where Storage class update time differs
    # from Creation time.
    new_update_time = datetime(2017, 2, 3, 4, 5, 6, 7, tzinfo=None)
    obj_metadata2 = apitools_messages.Object(
        name='foo2', bucket='bar2', timeCreated=current_time,
        updated=current_time,
        timeStorageClassUpdated=new_update_time,
        etag='12345')
    # Create mock object to point to obj_metadata2.
    obj_ref2 = mock.Mock()
    obj_ref2.root_object = obj_metadata2
    obj_ref2.url_string = 'foo2'

    # Print out attributes of object message.
    with CaptureStdout() as output2:
      PrintFullInfoAboutObject(obj_ref2)
    output2 = '\n'.join(output2)

    # Verify that Creation time and Storage class update time fields display and
    # are the same as the times set in the object message.
    find_time_created_re = re.compile(
        r'^\s*Creation time:\s+(?P<time_created_val>.+)$',
        re.MULTILINE)
    time_created_match = re.search(find_time_created_re, output2)
    self.assertIsNotNone(time_created_match)
    time_created = time_created_match.group('time_created_val')
    self.assertEqual(time_created, datetime.strftime(
        current_time, '%a, %d %b %Y %H:%M:%S GMT'))
    find_stor_update_re_2 = re.compile(
        r'^\s*Storage class update time:+(?P<stor_update_time_val_2>.+)$',
        re.MULTILINE)
    stor_update_time_match_2 = re.search(find_stor_update_re_2, output2)
    self.assertIsNotNone(stor_update_time_match_2)
    stor_update_time = stor_update_time_match_2.group('stor_update_time_val_2')
    self.assertEqual(stor_update_time, datetime.strftime(
        new_update_time, '%a, %d %b %Y %H:%M:%S GMT'))