def testInvalidCSEKConfigurationRaises(self): invalid_key = 'aP7KbmxLqDw1SWHeKvlfKOVgNRNNZc8L2sFz8ybLN===' with self.assertRaises(CommandException) as cm: CryptoKeyWrapperFromKey(invalid_key) self.assertIn( 'Configured encryption_key or decryption_key looked like a CSEK', cm.exception.reason)
def testInvalidCMEKConfigurationRaises(self): invalid_key = ( 'projects/my-project/locations/some-location/keyRings/keyring/' 'cryptoKeyWHOOPS-INVALID-RESOURCE-PORTION/somekey') with self.assertRaises(CommandException) as cm: CryptoKeyWrapperFromKey(invalid_key) self.assertIn( 'Configured encryption_key or decryption_key looked like a CMEK', cm.exception.reason)
def _test_rewrite_resume_or_restart(self, initial_dec_key, initial_enc_key, new_dec_key=None, new_enc_key=None): """Tests that the rewrite command restarts if the object's key changed. Args: initial_dec_key: Initial key the object is encrypted with, used as decryption key in the first rewrite call. initial_enc_key: Initial encryption key to rewrite the object with, used as encryption key in the first rewrite call. new_dec_key: Decryption key for the second rewrite call; if specified, object will be overwritten with a new encryption key in between the first and second rewrite calls, and this key will be used for the second rewrite call. new_enc_key: Encryption key for the second rewrite call; if specified, this key will be used for the second rewrite call, otherwise the initial key will be used. Returns: None """ if self.test_api == ApiSelector.XML: return unittest.skip('Rewrite API is only supported in JSON.') bucket_uri = self.CreateBucket() # maxBytesPerCall must be >= 1 MiB, so create an object > 2 MiB because we # need 2 response from the service: 1 success, 1 failure prior to # completion. object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo', contents=('12' * ONE_MIB) + 'bar', prefer_json_api=True, encryption_key=initial_dec_key) gsutil_api = GcsJsonApi(BucketStorageUri, logging.getLogger(), DiscardMessagesQueue(), self.default_provider) with SetBotoConfigForTest([('GSUtil', 'decryption_key1', initial_dec_key)]): src_obj_metadata = gsutil_api.GetObjectMetadata( object_uri.bucket_name, object_uri.object_name, provider=self.default_provider, fields=['bucket', 'contentType', 'etag', 'name']) dst_obj_metadata = src_obj_metadata tracker_file_name = GetRewriteTrackerFilePath(src_obj_metadata.bucket, src_obj_metadata.name, dst_obj_metadata.bucket, dst_obj_metadata.name, self.test_api) decryption_tuple = CryptoKeyWrapperFromKey(initial_dec_key) decryption_tuple2 = CryptoKeyWrapperFromKey(new_dec_key or initial_dec_key) encryption_tuple = CryptoKeyWrapperFromKey(initial_enc_key) encryption_tuple2 = CryptoKeyWrapperFromKey(new_enc_key or initial_enc_key) try: try: gsutil_api.CopyObject( src_obj_metadata, dst_obj_metadata, progress_callback=HaltingRewriteCallbackHandler(ONE_MIB * 2).call, max_bytes_per_call=ONE_MIB, decryption_tuple=decryption_tuple, encryption_tuple=encryption_tuple) self.fail('Expected RewriteHaltException.') except RewriteHaltException: pass # Tracker file should be left over. self.assertTrue(os.path.exists(tracker_file_name)) if new_dec_key: # Recreate the object with a different encryption key. self.CreateObject(bucket_uri=bucket_uri, object_name='foo', contents=('12' * ONE_MIB) + 'bar', prefer_json_api=True, encryption_key=new_dec_key, gs_idempotent_generation=urigen(object_uri)) with SetBotoConfigForTest([('GSUtil', 'decryption_key1', new_dec_key or initial_dec_key)]): original_md5 = gsutil_api.GetObjectMetadata( src_obj_metadata.bucket, src_obj_metadata.name, fields=['customerEncryption', 'md5Hash']).md5Hash if new_dec_key or new_enc_key: # Keys changed, rewrite should be restarted. progress_callback = EnsureRewriteRestartCallbackHandler( ONE_MIB).call else: # Keys are the same, rewrite should be resumed. progress_callback = EnsureRewriteResumeCallbackHandler( ONE_MIB * 2).call # Now resume. Callback ensures the appropriate resume/restart behavior. gsutil_api.CopyObject(src_obj_metadata, dst_obj_metadata, progress_callback=progress_callback, max_bytes_per_call=ONE_MIB, decryption_tuple=decryption_tuple2, encryption_tuple=encryption_tuple2) # Copy completed; tracker file should be deleted. self.assertFalse(os.path.exists(tracker_file_name)) final_enc_key = new_enc_key or initial_enc_key with SetBotoConfigForTest([('GSUtil', 'encryption_key', final_enc_key)]): self.assertEqual( original_md5, gsutil_api.GetObjectMetadata( dst_obj_metadata.bucket, dst_obj_metadata.name, fields=['customerEncryption', 'md5Hash']).md5Hash, 'Error: Rewritten object\'s hash doesn\'t match source object.' ) finally: # Clean up if something went wrong. DeleteTrackerFile(tracker_file_name)
def CatUrlStrings(self, url_strings, show_header=False, start_byte=0, end_byte=None, cat_out_fd=None): """Prints each of the url strings to stdout. Args: url_strings: String iterable. show_header: If true, print a header per file. start_byte: Starting byte of the file to print, used for constructing range requests. end_byte: Ending byte of the file to print; used for constructing range requests. If this is negative, the start_byte is ignored and and end range is sent over HTTP (such as range: bytes -9) cat_out_fd: File descriptor to which output should be written. Defaults to stdout if no file descriptor is supplied. Returns: 0 on success. Raises: CommandException if no URLs can be found. """ printed_one = False # This should refer to whatever sys.stdin refers to when this method is # run, not when this method is defined, so we do the initialization here # rather than define sys.stdin as the cat_out_fd parameter's default value. if cat_out_fd is None: cat_out_fd = sys.stdout # We manipulate the stdout so that all other data other than the Object # contents go to stderr. old_stdout = sys.stdout sys.stdout = sys.stderr try: if url_strings and url_strings[0] in ('-', 'file://-'): self._WriteBytesBufferedFileToFile(sys.stdin, cat_out_fd) else: for url_str in url_strings: did_some_work = False # TODO: Get only the needed fields here. for blr in self.command_obj.WildcardIterator( url_str ).IterObjects( bucket_listing_fields=_CAT_BUCKET_LISTING_FIELDS): decryption_keywrapper = None if (blr.root_object and blr.root_object.customerEncryption and blr.root_object.customerEncryption.keySha256): decryption_key = FindMatchingCSEKInBotoConfig( blr.root_object.customerEncryption.keySha256, config) if not decryption_key: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption ' 'key matches object %s' % (blr.root_object.customerEncryption. keySha256, blr.url_string)) decryption_keywrapper = CryptoKeyWrapperFromKey( decryption_key) did_some_work = True if show_header: if printed_one: print print '==> %s <==' % blr printed_one = True cat_object = blr.root_object storage_url = StorageUrlFromString(blr.url_string) if storage_url.IsCloudUrl(): compressed_encoding = ObjectIsGzipEncoded( cat_object) self.command_obj.gsutil_api.GetObjectMedia( cat_object.bucket, cat_object.name, cat_out_fd, compressed_encoding=compressed_encoding, start_byte=start_byte, end_byte=end_byte, object_size=cat_object.size, generation=storage_url.generation, decryption_tuple=decryption_keywrapper, provider=storage_url.scheme) else: with open(storage_url.object_name, 'rb') as f: self._WriteBytesBufferedFileToFile( f, cat_out_fd) if not did_some_work: raise CommandException(NO_URLS_MATCHED_TARGET % url_str) finally: sys.stdout = old_stdout return 0
def RunCommand(self): """Command entry point for the rewrite command.""" self.continue_on_error = self.parallel_operations self.csek_hash_to_keywrapper = {} self.dest_storage_class = None self.no_preserve_acl = False self.read_args_from_stdin = False self.supported_transformation_flags = ['-k', '-s'] self.transform_types = set() self.op_failure_count = 0 self.boto_file_encryption_keywrapper = GetEncryptionKeyWrapper(config) self.boto_file_encryption_sha256 = ( self.boto_file_encryption_keywrapper.crypto_key_sha256 if self.boto_file_encryption_keywrapper else None) if self.sub_opts: for o, a in self.sub_opts: if o == '-f': self.continue_on_error = True elif o == '-k': self.transform_types.add(_TransformTypes.CRYPTO_KEY) elif o == '-I': self.read_args_from_stdin = True elif o == '-O': self.no_preserve_acl = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True elif o == '-s': self.transform_types.add(_TransformTypes.STORAGE_CLASS) self.dest_storage_class = NormalizeStorageClass(a) if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rewrite command (without -I) expects at ' 'least one URL.') url_strs = self.args if not self.transform_types: raise CommandException( 'rewrite command requires at least one transformation flag. ' 'Currently supported transformation flags: %s' % self.supported_transformation_flags) self.preconditions = PreconditionsFromHeaders(self.headers or {}) url_strs_generator = GenerationCheckGenerator(url_strs) # Convert recursive flag to flat wildcard to avoid performing multiple # listings. if self.recursion_requested: url_strs_generator = ConvertRecursiveToFlatWildcard( url_strs_generator) # Expand the source argument(s). name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs_generator, self.recursion_requested, project_id=self.project_id, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name', 'size']) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: # Perform the same recursive-to-flat conversion on original url_strs so # that it is as true to the original iterator as possible. seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), seek_ahead_url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Rather than have each worker repeatedly calculate the sha256 hash for each # decryption_key in the boto config, do this once now and cache the results. for i in range(0, MAX_DECRYPTION_KEYS): key_number = i + 1 keywrapper = CryptoKeyWrapperFromKey( config.get('GSUtil', 'decryption_key%s' % str(key_number), None)) if keywrapper is None: # Stop at first attribute absence in lexicographical iteration. break if keywrapper.crypto_type == CryptoKeyType.CSEK: self.csek_hash_to_keywrapper[ keywrapper.crypto_key_sha256] = keywrapper # Also include the encryption_key, since it should be used to decrypt and # then encrypt if the object's CSEK should remain the same. if self.boto_file_encryption_sha256 is not None: self.csek_hash_to_keywrapper[self.boto_file_encryption_sha256] = ( self.boto_file_encryption_keywrapper) # Perform rewrite requests in parallel (-m) mode, if requested. self.Apply(_RewriteFuncWrapper, name_expansion_iterator, _RewriteExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count'], seek_ahead_iterator=seek_ahead_iterator) if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be rewritten.' % (self.op_failure_count, plural_str, plural_str)) return 0
def __init__(self, src_url, src_obj_size, gsutil_api, compressed_encoding=False, progress_callback=None, download_chunk_size=_DEFAULT_DOWNLOAD_CHUNK_SIZE, decryption_key=None): """Initializes the daisy chain wrapper. Args: src_url: Source CloudUrl to copy from. src_obj_size: Size of source object. gsutil_api: gsutil Cloud API to use for the copy. compressed_encoding: If true, source object has content-encoding: gzip. progress_callback: Optional callback function for progress notifications for the download thread. Receives calls with arguments (bytes_transferred, total_size). download_chunk_size: Integer number of bytes to download per GetObjectMedia request. This is the upper bound of bytes that may be unnecessarily downloaded if there is a break in the resumable upload. decryption_key: Base64-encoded decryption key for the source object, if any. """ # Current read position for the upload file pointer. self.position = 0 self.buffer = deque() self.bytes_buffered = 0 # Maximum amount of bytes in memory at a time. self.max_buffer_size = 1024 * 1024 # 1 MiB self._download_chunk_size = download_chunk_size # We save one buffer's worth of data as a special case for boto, # which seeks back one buffer and rereads to compute hashes. This is # unnecessary because we can just compare cloud hash digests at the end, # but it allows this to work without modfiying boto. self.last_position = 0 self.last_data = None # Protects buffer, position, bytes_buffered, last_position, and last_data. self.lock = CreateLock() # Protects download_exception. self.download_exception_lock = CreateLock() self.src_obj_size = src_obj_size self.src_url = src_url self.compressed_encoding = compressed_encoding self.decryption_tuple = CryptoKeyWrapperFromKey(decryption_key) # This is safe to use the upload and download thread because the download # thread calls only GetObjectMedia, which creates a new HTTP connection # independent of gsutil_api. Thus, it will not share an HTTP connection # with the upload. self.gsutil_api = gsutil_api # If self.download_thread dies due to an exception, it is saved here so # that it can also be raised in the upload thread. self.download_exception = None self.download_thread = None self.progress_callback = progress_callback self.download_started = threading.Event() self.stop_download = threading.Event() self.StartDownloadThread(progress_callback=self.progress_callback) if self.download_started.wait(60) == False: raise Exception( 'Could not start download thread after 60 seconds.')
def CreateObjectJson(self, contents, bucket_name=None, object_name=None, encryption_key=None, mtime=None, storage_class=None, gs_idempotent_generation=None, kms_key_name=None): """Creates a test object (GCS provider only) using the JSON API. Args: contents: The contents to write to the object. bucket_name: Name of bucket to place the object in. If not specified, a new temporary bucket is created. object_name: The name to use for the object. If not specified, a temporary test object name is constructed. encryption_key: AES256 encryption key to use when creating the object, if any. mtime: The modification time of the file in POSIX time (seconds since UTC 1970-01-01). If not specified, this defaults to the current system time. storage_class: String representing the storage class to use for the object. gs_idempotent_generation: For use when overwriting an object for which you know the previously uploaded generation. Create GCS object idempotently by supplying this generation number as a precondition and assuming the current object is correct on precondition failure. Defaults to 0 (new object); to disable, set to None. kms_key_name: Fully-qualified name of the KMS key that should be used to encrypt the object. Note that this is currently only valid for 'gs' objects. Returns: An apitools Object for the created object. """ bucket_name = bucket_name or self.CreateBucketJson().name object_name = object_name or self.MakeTempName('obj') preconditions = Preconditions(gen_match=gs_idempotent_generation) custom_metadata = apitools_messages.Object.MetadataValue( additionalProperties=[]) if mtime is not None: CreateCustomMetadata({MTIME_ATTR: mtime}, custom_metadata) object_metadata = apitools_messages.Object( name=object_name, metadata=custom_metadata, bucket=bucket_name, contentType='application/octet-stream', storageClass=storage_class, kmsKeyName=kms_key_name) encryption_keywrapper = CryptoKeyWrapperFromKey(encryption_key) try: return self.json_api.UploadObject( cStringIO.StringIO(contents), object_metadata, provider='gs', encryption_tuple=encryption_keywrapper, preconditions=preconditions) except PreconditionException: if gs_idempotent_generation is None: raise with SetBotoConfigForTest([('GSUtil', 'decryption_key1', encryption_key)]): return self.json_api.GetObjectMetadata(bucket_name, object_name)