Example #1
0
 def testInvalidCSEKConfigurationRaises(self):
   invalid_key = 'aP7KbmxLqDw1SWHeKvlfKOVgNRNNZc8L2sFz8ybLN==='
   with self.assertRaises(CommandException) as cm:
     CryptoKeyWrapperFromKey(invalid_key)
   self.assertIn(
       'Configured encryption_key or decryption_key looked like a CSEK',
       cm.exception.reason)
Example #2
0
  def CreateObjectJson(self, contents, bucket_name=None, object_name=None,
                       encryption_key=None, mtime=None, storage_class=None,
                       gs_idempotent_generation=None, kms_key_name=None):
    """Creates a test object (GCS provider only) using the JSON API.

    Args:
      contents: The contents to write to the object.
      bucket_name: Name of bucket to place the object in. If not specified,
          a new temporary bucket is created. Assumes the given bucket name is
          valid.
      object_name: The name to use for the object. If not specified, a temporary
          test object name is constructed.
      encryption_key: AES256 encryption key to use when creating the object,
          if any.
      mtime: The modification time of the file in POSIX time (seconds since
          UTC 1970-01-01). If not specified, this defaults to the current
          system time.
      storage_class: String representing the storage class to use for the
          object.
      gs_idempotent_generation: For use when overwriting an object for which
          you know the previously uploaded generation. Create GCS object
          idempotently by supplying this generation number as a precondition
          and assuming the current object is correct on precondition failure.
          Defaults to 0 (new object); to disable, set to None.
      kms_key_name: Fully-qualified name of the KMS key that should be used to
          encrypt the object. Note that this is currently only valid for 'gs'
          objects.

    Returns:
      An apitools Object for the created object.
    """
    bucket_name = bucket_name or self.CreateBucketJson().name
    object_name = object_name or self.MakeTempName('obj')
    preconditions = Preconditions(gen_match=gs_idempotent_generation)
    custom_metadata = apitools_messages.Object.MetadataValue(
        additionalProperties=[])
    if mtime is not None:
      CreateCustomMetadata({MTIME_ATTR: mtime}, custom_metadata)
    object_metadata = apitools_messages.Object(
        name=object_name,
        metadata=custom_metadata,
        bucket=bucket_name,
        contentType='application/octet-stream',
        storageClass=storage_class,
        kmsKeyName=kms_key_name)
    encryption_keywrapper = CryptoKeyWrapperFromKey(encryption_key)
    try:
      return self.json_api.UploadObject(
          cStringIO.StringIO(contents),
          object_metadata, provider='gs',
          encryption_tuple=encryption_keywrapper,
          preconditions=preconditions)
    except PreconditionException:
      if gs_idempotent_generation is None:
        raise
      with SetBotoConfigForTest([('GSUtil', 'decryption_key1',
                                  encryption_key)]):
        return self.json_api.GetObjectMetadata(bucket_name, object_name)
Example #3
0
 def testInvalidCMEKConfigurationRaises(self):
   invalid_key = (
       'projects/my-project/locations/some-location/keyRings/keyring/'
       'cryptoKeyWHOOPS-INVALID-RESOURCE-PORTION/somekey')
   with self.assertRaises(CommandException) as cm:
     CryptoKeyWrapperFromKey(invalid_key)
   self.assertIn(
       'Configured encryption_key or decryption_key looked like a CMEK',
       cm.exception.reason)
Example #4
0
  def RunCommand(self):
    """Command entry point for the rewrite command."""
    self.continue_on_error = self.parallel_operations
    self.csek_hash_to_keywrapper = {}
    self.dest_storage_class = None
    self.no_preserve_acl = False
    self.read_args_from_stdin = False
    self.supported_transformation_flags = ['-k', '-s']
    self.transform_types = set()

    self.op_failure_count = 0
    self.boto_file_encryption_keywrapper = GetEncryptionKeyWrapper(config)
    self.boto_file_encryption_sha256 = (
        self.boto_file_encryption_keywrapper.crypto_key_sha256
        if self.boto_file_encryption_keywrapper else None)

    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-f':
          self.continue_on_error = True
        elif o == '-k':
          self.transform_types.add(_TransformTypes.CRYPTO_KEY)
        elif o == '-I':
          self.read_args_from_stdin = True
        elif o == '-O':
          self.no_preserve_acl = True
        elif o == '-r' or o == '-R':
          self.recursion_requested = True
          self.all_versions = True
        elif o == '-s':
          self.transform_types.add(_TransformTypes.STORAGE_CLASS)
          self.dest_storage_class = NormalizeStorageClass(a)

    if self.read_args_from_stdin:
      if self.args:
        raise CommandException('No arguments allowed with the -I flag.')
      url_strs = StdinIterator()
    else:
      if not self.args:
        raise CommandException('The rewrite command (without -I) expects at '
                               'least one URL.')
      url_strs = self.args

    if not self.transform_types:
      raise CommandException(
          'rewrite command requires at least one transformation flag. '
          'Currently supported transformation flags: %s' %
          self.supported_transformation_flags)

    self.preconditions = PreconditionsFromHeaders(self.headers or {})

    url_strs_generator = GenerationCheckGenerator(url_strs)

    # Convert recursive flag to flat wildcard to avoid performing multiple
    # listings.
    if self.recursion_requested:
      url_strs_generator = ConvertRecursiveToFlatWildcard(url_strs_generator)

    # Expand the source argument(s).
    name_expansion_iterator = NameExpansionIterator(
        self.command_name,
        self.debug,
        self.logger,
        self.gsutil_api,
        url_strs_generator,
        self.recursion_requested,
        project_id=self.project_id,
        continue_on_error=self.continue_on_error or self.parallel_operations,
        bucket_listing_fields=['name', 'size'])

    seek_ahead_iterator = None
    # Cannot seek ahead with stdin args, since we can only iterate them
    # once without buffering in memory.
    if not self.read_args_from_stdin:
      # Perform the same recursive-to-flat conversion on original url_strs so
      # that it is as true to the original iterator as possible.
      seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs)
      seek_ahead_iterator = SeekAheadNameExpansionIterator(
          self.command_name,
          self.debug,
          self.GetSeekAheadGsutilApi(),
          seek_ahead_url_strs,
          self.recursion_requested,
          all_versions=self.all_versions,
          project_id=self.project_id)

    # Rather than have each worker repeatedly calculate the sha256 hash for each
    # decryption_key in the boto config, do this once now and cache the results.
    for i in range(0, MAX_DECRYPTION_KEYS):
      key_number = i + 1
      keywrapper = CryptoKeyWrapperFromKey(
          config.get('GSUtil', 'decryption_key%s' % str(key_number), None))
      if keywrapper is None:
        # Stop at first attribute absence in lexicographical iteration.
        break
      if keywrapper.crypto_type == CryptoKeyType.CSEK:
        self.csek_hash_to_keywrapper[keywrapper.crypto_key_sha256] = keywrapper
    # Also include the encryption_key, since it should be used to decrypt and
    # then encrypt if the object's CSEK should remain the same.
    if self.boto_file_encryption_sha256 is not None:
      self.csek_hash_to_keywrapper[self.boto_file_encryption_sha256] = (
          self.boto_file_encryption_keywrapper)

    if self.boto_file_encryption_keywrapper is None:
      msg = '\n'.join(
          textwrap.wrap(
              'NOTE: No encryption_key was specified in the boto configuration '
              'file, so gsutil will not provide an encryption key in its rewrite '
              'API requests. This will decrypt the objects unless they are in '
              'buckets with a default KMS key set, in which case the service '
              'will automatically encrypt the rewritten objects with that key.')
      )
      print('%s\n' % msg, file=sys.stderr)

    # Perform rewrite requests in parallel (-m) mode, if requested.
    self.Apply(_RewriteFuncWrapper,
               name_expansion_iterator,
               _RewriteExceptionHandler,
               fail_on_error=(not self.continue_on_error),
               shared_attrs=['op_failure_count'],
               seek_ahead_iterator=seek_ahead_iterator)

    if self.op_failure_count:
      plural_str = 's' if self.op_failure_count else ''
      raise CommandException('%d file%s/object%s could not be rewritten.' %
                             (self.op_failure_count, plural_str, plural_str))

    return 0
Example #5
0
    def __init__(self,
                 src_url,
                 src_obj_size,
                 gsutil_api,
                 compressed_encoding=False,
                 progress_callback=None,
                 download_chunk_size=_DEFAULT_DOWNLOAD_CHUNK_SIZE,
                 decryption_key=None):
        """Initializes the daisy chain wrapper.

    Args:
      src_url: Source CloudUrl to copy from.
      src_obj_size: Size of source object.
      gsutil_api: gsutil Cloud API to use for the copy.
      compressed_encoding: If true, source object has content-encoding: gzip.
      progress_callback: Optional callback function for progress notifications
          for the download thread. Receives calls with arguments
          (bytes_transferred, total_size).
      download_chunk_size: Integer number of bytes to download per
          GetObjectMedia request. This is the upper bound of bytes that may be
          unnecessarily downloaded if there is a break in the resumable upload.
      decryption_key: Base64-encoded decryption key for the source object,
          if any.
    Raises:
      Exception: if the download thread doesn't start within 60 seconds
    """
        # Current read position for the upload file pointer.
        self.position = 0
        self.buffer = collections.deque()

        self.bytes_buffered = 0
        # Maximum amount of bytes in memory at a time.
        self.max_buffer_size = 1024 * 1024  # 1 MiB

        self._download_chunk_size = download_chunk_size

        # We save one buffer's worth of data as a special case for boto,
        # which seeks back one buffer and rereads to compute hashes. This is
        # unnecessary because we can just compare cloud hash digests at the end,
        # but it allows this to work without modfiying boto.
        self.last_position = 0
        self.last_data = None

        # Protects buffer, position, bytes_buffered, last_position, and last_data.
        self.lock = parallelism_framework_util.CreateLock()

        # Protects download_exception.
        self.download_exception_lock = parallelism_framework_util.CreateLock()

        self.src_obj_size = src_obj_size
        self.src_url = src_url
        self.compressed_encoding = compressed_encoding
        self.decryption_tuple = CryptoKeyWrapperFromKey(decryption_key)

        # This is safe to use the upload and download thread because the download
        # thread calls only GetObjectMedia, which creates a new HTTP connection
        # independent of gsutil_api. Thus, it will not share an HTTP connection
        # with the upload.
        self.gsutil_api = gsutil_api

        # If self.download_thread dies due to an exception, it is saved here so
        # that it can also be raised in the upload thread.
        self.download_exception = None
        self.download_thread = None
        self.progress_callback = progress_callback
        self.download_started = threading.Event()
        self.stop_download = threading.Event()
        self.StartDownloadThread(progress_callback=self.progress_callback)
        if not self.download_started.wait(60):
            raise Exception(
                'Could not start download thread after 60 seconds.')
Example #6
0
  def CatUrlStrings(self,
                    url_strings,
                    show_header=False,
                    start_byte=0,
                    end_byte=None,
                    cat_out_fd=None):
    """Prints each of the url strings to stdout.

    Args:
      url_strings: String iterable.
      show_header: If true, print a header per file.
      start_byte: Starting byte of the file to print, used for constructing
                  range requests.
      end_byte: Ending byte of the file to print; used for constructing range
                requests. If this is negative, the start_byte is ignored and
                and end range is sent over HTTP (such as range: bytes -9)
      cat_out_fd: File descriptor to which output should be written. Defaults to
                 stdout if no file descriptor is supplied.
    Returns:
      0 on success.

    Raises:
      CommandException if no URLs can be found.
    """
    printed_one = False
    # This should refer to whatever sys.stdin refers to when this method is
    # run, not when this method is defined, so we do the initialization here
    # rather than define sys.stdin as the cat_out_fd parameter's default value.
    if cat_out_fd is None:
      cat_out_fd = sys.stdout
    # We manipulate the stdout so that all other data other than the Object
    # contents go to stderr.
    old_stdout = sys.stdout
    sys.stdout = sys.stderr
    try:
      if url_strings and url_strings[0] in ('-', 'file://-'):
        self._WriteBytesBufferedFileToFile(sys.stdin, cat_out_fd)
      else:
        for url_str in url_strings:
          did_some_work = False
          # TODO: Get only the needed fields here.
          for blr in self.command_obj.WildcardIterator(url_str).IterObjects(
              bucket_listing_fields=_CAT_BUCKET_LISTING_FIELDS):
            decryption_keywrapper = None
            if (blr.root_object and blr.root_object.customerEncryption and
                blr.root_object.customerEncryption.keySha256):
              decryption_key = FindMatchingCSEKInBotoConfig(
                  blr.root_object.customerEncryption.keySha256, config)
              if not decryption_key:
                raise EncryptionException(
                    'Missing decryption key with SHA256 hash %s. No decryption '
                    'key matches object %s' %
                    (blr.root_object.customerEncryption.keySha256,
                     blr.url_string))
              decryption_keywrapper = CryptoKeyWrapperFromKey(decryption_key)

            did_some_work = True
            if show_header:
              if printed_one:
                print()
              print('==> %s <==' % blr)
              printed_one = True
            cat_object = blr.root_object
            storage_url = StorageUrlFromString(blr.url_string)
            if storage_url.IsCloudUrl():
              compressed_encoding = ObjectIsGzipEncoded(cat_object)
              self.command_obj.gsutil_api.GetObjectMedia(
                  cat_object.bucket,
                  cat_object.name,
                  cat_out_fd,
                  compressed_encoding=compressed_encoding,
                  start_byte=start_byte,
                  end_byte=end_byte,
                  object_size=cat_object.size,
                  generation=storage_url.generation,
                  decryption_tuple=decryption_keywrapper,
                  provider=storage_url.scheme)
            else:
              with open(storage_url.object_name, 'rb') as f:
                self._WriteBytesBufferedFileToFile(f, cat_out_fd)
          if not did_some_work:
            raise CommandException(NO_URLS_MATCHED_TARGET % url_str)
    finally:
      sys.stdout = old_stdout

    return 0
  def _test_rewrite_resume_or_restart(self, initial_dec_key, initial_enc_key,
                                      new_dec_key=None, new_enc_key=None):
    """Tests that the rewrite command restarts if the object's key changed.

    Args:
      initial_dec_key: Initial key the object is encrypted with, used as
          decryption key in the first rewrite call.
      initial_enc_key: Initial encryption key to rewrite the object with,
          used as encryption key in the first rewrite call.
      new_dec_key: Decryption key for the second rewrite call; if specified,
          object will be overwritten with a new encryption key in between
          the first and second rewrite calls, and this key will be used for
          the second rewrite call.
      new_enc_key: Encryption key for the second rewrite call; if specified,
          this key will be used for the second rewrite call, otherwise the
          initial key will be used.

    Returns:
      None
    """
    if self.test_api == ApiSelector.XML:
      return unittest.skip('Rewrite API is only supported in JSON.')
    bucket_uri = self.CreateBucket()
    # maxBytesPerCall must be >= 1 MiB, so create an object > 2 MiB because we
    # need 2 response from the service: 1 success, 1 failure prior to
    # completion.
    object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo',
                                   contents=('12'*ONE_MIB) + 'bar',
                                   prefer_json_api=True,
                                   encryption_key=initial_dec_key)
    gsutil_api = GcsJsonApi(BucketStorageUri, logging.getLogger(),
                            DiscardMessagesQueue(), self.default_provider)
    with SetBotoConfigForTest(
        [('GSUtil', 'decryption_key1', initial_dec_key)]):
      src_obj_metadata = gsutil_api.GetObjectMetadata(
          object_uri.bucket_name, object_uri.object_name,
          provider=self.default_provider, fields=['bucket', 'contentType',
                                                  'etag', 'name'])
    dst_obj_metadata = src_obj_metadata
    tracker_file_name = GetRewriteTrackerFilePath(
        src_obj_metadata.bucket, src_obj_metadata.name,
        dst_obj_metadata.bucket, dst_obj_metadata.name, self.test_api)
    decryption_tuple = CryptoKeyWrapperFromKey(initial_dec_key)
    decryption_tuple2 = CryptoKeyWrapperFromKey(new_dec_key or initial_dec_key)
    encryption_tuple = CryptoKeyWrapperFromKey(initial_enc_key)
    encryption_tuple2 = CryptoKeyWrapperFromKey(new_enc_key or initial_enc_key)

    try:
      try:
        gsutil_api.CopyObject(
            src_obj_metadata, dst_obj_metadata,
            progress_callback=HaltingRewriteCallbackHandler(ONE_MIB*2).call,
            max_bytes_per_call=ONE_MIB, decryption_tuple=decryption_tuple,
            encryption_tuple=encryption_tuple)
        self.fail('Expected RewriteHaltException.')
      except RewriteHaltException:
        pass

      # Tracker file should be left over.
      self.assertTrue(os.path.exists(tracker_file_name))

      if new_dec_key:
        # Recreate the object with a different encryption key.
        self.CreateObject(
            bucket_uri=bucket_uri, object_name='foo',
            contents=('12'*ONE_MIB) + 'bar', prefer_json_api=True,
            encryption_key=new_dec_key,
            gs_idempotent_generation=urigen(object_uri))

      with SetBotoConfigForTest([
          ('GSUtil', 'decryption_key1', new_dec_key or initial_dec_key)]):
        original_md5 = gsutil_api.GetObjectMetadata(
            src_obj_metadata.bucket, src_obj_metadata.name,
            fields=['customerEncryption', 'md5Hash']).md5Hash

      if new_dec_key or new_enc_key:
        # Keys changed, rewrite should be restarted.
        progress_callback = EnsureRewriteRestartCallbackHandler(ONE_MIB).call
      else:
        # Keys are the same, rewrite should be resumed.
        progress_callback = EnsureRewriteResumeCallbackHandler(ONE_MIB*2).call

      # Now resume. Callback ensures the appropriate resume/restart behavior.
      gsutil_api.CopyObject(
          src_obj_metadata, dst_obj_metadata,
          progress_callback=progress_callback, max_bytes_per_call=ONE_MIB,
          decryption_tuple=decryption_tuple2,
          encryption_tuple=encryption_tuple2)

      # Copy completed; tracker file should be deleted.
      self.assertFalse(os.path.exists(tracker_file_name))

      final_enc_key = new_enc_key or initial_enc_key

      with SetBotoConfigForTest([
          ('GSUtil', 'encryption_key', final_enc_key)]):
        self.assertEqual(
            original_md5,
            gsutil_api.GetObjectMetadata(dst_obj_metadata.bucket,
                                         dst_obj_metadata.name,
                                         fields=['customerEncryption',
                                                 'md5Hash']).md5Hash,
            'Error: Rewritten object\'s hash doesn\'t match source object.')
    finally:
      # Clean up if something went wrong.
      DeleteTrackerFile(tracker_file_name)