def RunCommand(self): """Command entry point for the hash command.""" (calc_crc32c, calc_md5, format_func, output_format) = (self._ParseOpts(self.sub_opts, self.logger)) matched_one = False for url_str in self.args: if not StorageUrlFromString(url_str).IsFileUrl(): raise CommandException('"hash" command requires a file URL') for file_ref in self.WildcardIterator(url_str).IterObjects(): matched_one = True file_name = file_ref.storage_url.object_name file_size = os.path.getsize(file_name) callback_processor = ProgressCallbackWithBackoff( file_size, FileProgressCallbackHandler( ConstructAnnounceText('Hashing', file_name), self.logger).call) hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5) with open(file_name, 'rb') as fp: CalculateHashesFromContents( fp, hash_dict, callback_processor=callback_processor) print 'Hashes [%s] for %s:' % (output_format, file_name) for name, digest in hash_dict.iteritems(): print '\tHash (%s):\t\t%s' % (name, format_func(digest)) if not matched_one: raise CommandException('No files matched') return 0
def RunCommand(self): """Command entry point for the hash command.""" (calc_crc32c, calc_md5, format_func, cloud_format_func, output_format) = ( self._ParseOpts(self.sub_opts, self.logger)) matched_one = False for url_str in self.args: for file_ref in self.WildcardIterator( url_str).IterObjects(bucket_listing_fields=['crc32c', 'md5Hash', 'customerEncryption', 'size']): matched_one = True url = StorageUrlFromString(url_str) file_name = file_ref.storage_url.object_name if StorageUrlFromString(url_str).IsFileUrl(): file_size = os.path.getsize(file_name) self.gsutil_api.status_queue.put( FileMessage(url, None, time.time(), size=file_size, finished=False, message_type=FileMessage.FILE_HASH)) callback_processor = ProgressCallbackWithTimeout( file_size, FileProgressCallbackHandler( self.gsutil_api.status_queue, src_url=StorageUrlFromString(url_str), operation_name='Hashing').call) hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5) with open(file_name, 'rb') as fp: CalculateHashesFromContents(fp, hash_dict, callback_processor=callback_processor) self.gsutil_api.status_queue.put( FileMessage(url, None, time.time(), size=file_size, finished=True, message_type=FileMessage.FILE_HASH)) else: hash_dict = {} obj_metadata = file_ref.root_object file_size = obj_metadata.size md5_present = obj_metadata.md5Hash is not None crc32c_present = obj_metadata.crc32c is not None if not md5_present and not crc32c_present: logging.getLogger().warn('No hashes present for %s', url_str) continue if md5_present: hash_dict['md5'] = obj_metadata.md5Hash if crc32c_present: hash_dict['crc32c'] = obj_metadata.crc32c print 'Hashes [%s] for %s:' % (output_format, file_name) for name, digest in hash_dict.iteritems(): print '\tHash (%s):\t\t%s' % (name, (format_func(digest) if url.IsFileUrl() else cloud_format_func(digest))) if not matched_one: raise CommandException('No files matched') PutToQueueWithTimeout(self.gsutil_api.status_queue, FinalMessage(time.time())) return 0
def RewriteFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) transform_url = name_expansion_result.expanded_storage_url # Make a local copy of the requested transformations for each thread. As # a redundant transformation for one object might not be redundant for # another, we wouldn't want to remove it from the transform_types set that # all threads share. transforms_to_perform = set(self.transform_types) self.CheckProvider(transform_url) # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) # Note: If other transform types are added, they must ensure that the # encryption key configuration matches the boto configuration, because # gsutil maintains an invariant that all objects it writes use the # encryption_key value (including decrypting if no key is present). src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 should_encrypt_target = self.boto_file_encryption_sha256 is not None source_was_encrypted = src_encryption_sha256 is not None using_same_encryption_key_value = ( src_encryption_sha256 == self.boto_file_encryption_sha256) # Prevent accidental key rotation. if (_TransformTypes.CRYPTO_KEY not in transforms_to_perform and not using_same_encryption_key_value): raise EncryptionException( 'The "-k" flag was not passed to the rewrite command, but the ' 'encryption_key value in your boto config file did not match the key ' 'used to encrypt the object "%s" (hash: %s). To encrypt the object ' 'using a different key, you must specify the "-k" flag.' % (transform_url, src_encryption_sha256)) # Remove any redundant changes. # STORAGE_CLASS transform should be skipped if the target storage class # matches the existing storage class. if (_TransformTypes.STORAGE_CLASS in transforms_to_perform and self.dest_storage_class == NormalizeStorageClass( src_metadata.storageClass)): transforms_to_perform.remove(_TransformTypes.STORAGE_CLASS) self.logger.info( 'Redundant transform: %s already had storage class of ' '%s.' % (transform_url, src_metadata.storageClass)) # CRYPTO_KEY transform should be skipped if we're using the same encryption # key (if any) that was used to encrypt the source. if (_TransformTypes.CRYPTO_KEY in transforms_to_perform and using_same_encryption_key_value): if self.boto_file_encryption_sha256 is None: log_msg = '%s is already decrypted.' % transform_url else: log_msg = '%s already has current encryption key.' % transform_url transforms_to_perform.remove(_TransformTypes.CRYPTO_KEY) self.logger.info('Redundant transform: %s' % log_msg) if not transforms_to_perform: self.logger.info( 'Skipping %s, all transformations were redundant.' % transform_url) return # Make a deep copy of the source metadata. dst_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dst_metadata.customerEncryption = None dst_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dst_metadata.id = None decryption_tuple = None # Use a generic operation name by default - this can be altered below for # specific transformations (encryption changes, etc.). operation_name = 'Rewriting' if source_was_encrypted: decryption_key = FindMatchingCryptoKey(src_encryption_sha256) if not decryption_key: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) decryption_tuple = CryptoTupleFromKey(decryption_key) if _TransformTypes.CRYPTO_KEY in transforms_to_perform: if not source_was_encrypted: operation_name = 'Encrypting' elif not should_encrypt_target: operation_name = 'Decrypting' else: operation_name = 'Rotating' if _TransformTypes.STORAGE_CLASS in transforms_to_perform: dst_metadata.storageClass = self.dest_storage_class # TODO: Remove this call (used to verify tests) and make it processed by # the UIThread. sys.stderr.write( _ConstructAnnounceText(operation_name, transform_url.url_string)) # Message indicating beginning of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=False, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE)) progress_callback = FileProgressCallbackHandler( gsutil_api.status_queue, src_url=transform_url, operation_name=operation_name).call gsutil_api.CopyObject(src_metadata, dst_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_tuple, encryption_tuple=self.boto_file_encryption_tuple, provider=transform_url.scheme, fields=[]) # Message indicating end of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=True, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE))
def CryptoRewrite(self, transform_url, gsutil_api): """Make the cloud object at transform_url match encryption configuration. Args: transform_url: CloudUrl to rewrite. gsutil_api: gsutil CloudApi instance for making API calls. """ # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 if src_encryption_sha256 == self.current_encryption_sha256: if self.current_encryption_sha256 is not None: self.logger.info( 'Skipping %s, already has current encryption key' % transform_url) else: self.logger.info('Skipping %s, already decrypted' % transform_url) else: # Make a deep copy of the source metadata dst_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dst_metadata.customerEncryption = None dst_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dst_metadata.id = None decryption_tuple = None if src_encryption_sha256 is None: announce_text = 'Encrypting' else: decryption_key = FindMatchingCryptoKey(src_encryption_sha256) if not decryption_key: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) decryption_tuple = CryptoTupleFromKey(decryption_key) if self.current_encryption_sha256 is None: announce_text = 'Decrypting' else: announce_text = 'Rotating' progress_callback = FileProgressCallbackHandler( ConstructAnnounceText(announce_text, transform_url.url_string), gsutil_api.status_queue).call gsutil_api.CopyObject( src_metadata, dst_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_tuple, encryption_tuple=self.current_encryption_tuple, provider=transform_url.scheme, fields=[])
def RewriteFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) transform_url = name_expansion_result.expanded_storage_url self.CheckProvider(transform_url) # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) # Note: If other transform types are added, they must ensure that the # encryption key configuration matches the boto configuration, because # gsutil maintains an invariant that all objects it writes use the # encryption_key value (including decrypting if no key is present). # Store metadata about src encryption to make logic below easier to read. src_encryption_kms_key = (src_metadata.kmsKeyName if src_metadata.kmsKeyName else None) src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 # In python3, hashes are bytes, use ascii since it should be ascii src_encryption_sha256 = src_encryption_sha256.encode('ascii') src_was_encrypted = (src_encryption_sha256 is not None or src_encryption_kms_key is not None) # Also store metadata about dest encryption. dest_encryption_kms_key = None if (self.boto_file_encryption_keywrapper is not None and self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CMEK): dest_encryption_kms_key = self.boto_file_encryption_keywrapper.crypto_key dest_encryption_sha256 = None if (self.boto_file_encryption_keywrapper is not None and self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CSEK): dest_encryption_sha256 = ( self.boto_file_encryption_keywrapper.crypto_key_sha256) should_encrypt_dest = self.boto_file_encryption_keywrapper is not None encryption_unchanged = (src_encryption_sha256 == dest_encryption_sha256 and src_encryption_kms_key == dest_encryption_kms_key) # Prevent accidental key rotation. if (_TransformTypes.CRYPTO_KEY not in self.transform_types and not encryption_unchanged): raise EncryptionException( 'The "-k" flag was not passed to the rewrite command, but the ' 'encryption_key value in your boto config file did not match the key ' 'used to encrypt the object "%s" (hash: %s). To encrypt the object ' 'using a different key, you must specify the "-k" flag.' % (transform_url, src_encryption_sha256)) # Determine if we can skip this rewrite operation (this should only be done # when ALL of the specified transformations are redundant). redundant_transforms = [] # STORAGE_CLASS transform is redundant if the target storage class matches # the existing storage class. if (_TransformTypes.STORAGE_CLASS in self.transform_types and self.dest_storage_class == NormalizeStorageClass( src_metadata.storageClass)): redundant_transforms.append('storage class') # CRYPTO_KEY transform is redundant if we're using the same encryption # key that was used to encrypt the source. However, if no encryption key was # specified, we should still perform the rewrite. This results in the # rewritten object either being encrypted with its bucket's default KMS key # or having no CSEK/CMEK encryption applied. While we could attempt fetching # the bucket's metadata and checking its default KMS key before performing # the rewrite (in the case where we appear to be transitioning from # no key to no key), that is vulnerable to the race condition where the # default KMS key is changed between when we check it and when we rewrite # the object. if (_TransformTypes.CRYPTO_KEY in self.transform_types and should_encrypt_dest and encryption_unchanged): redundant_transforms.append('encryption key') if len(redundant_transforms) == len(self.transform_types): self.logger.info('Skipping %s, all transformations were redundant: %s' % (transform_url, redundant_transforms)) return # First make a deep copy of the source metadata, then overwrite any # requested attributes (e.g. if a storage class change was specified). dest_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dest_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dest_metadata.id = None # Ensure we don't copy over the KMS key name or CSEK key info from the # source object; those should only come from the boto config's # encryption_key value. dest_metadata.customerEncryption = None dest_metadata.kmsKeyName = None # Both a storage class change and CMEK encryption should be set as part of # the dest object's metadata. CSEK encryption, if specified, is added to the # request later via headers obtained from the keywrapper value passed to # encryption_tuple. if _TransformTypes.STORAGE_CLASS in self.transform_types: dest_metadata.storageClass = self.dest_storage_class if dest_encryption_kms_key is not None: dest_metadata.kmsKeyName = dest_encryption_kms_key # Make sure we have the CSEK key necessary to decrypt. decryption_keywrapper = None if src_encryption_sha256 is not None: if src_encryption_sha256 in self.csek_hash_to_keywrapper: decryption_keywrapper = ( self.csek_hash_to_keywrapper[src_encryption_sha256]) else: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) operation_name = 'Rewriting' if _TransformTypes.CRYPTO_KEY in self.transform_types: if src_was_encrypted and should_encrypt_dest: if not encryption_unchanged: operation_name = 'Rotating' # Else, keep "Rewriting". This might occur when -k was specified and was # redundant, but we're performing the operation anyway because some # other transformation was not redundant. elif src_was_encrypted and not should_encrypt_dest: operation_name = 'Decrypting' elif not src_was_encrypted and should_encrypt_dest: operation_name = 'Encrypting' # TODO: Remove this call (used to verify tests) and make it processed by # the UIThread. sys.stderr.write( _ConstructAnnounceText(operation_name, transform_url.url_string)) sys.stderr.flush() # Message indicating beginning of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=False, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE)) progress_callback = FileProgressCallbackHandler( gsutil_api.status_queue, src_url=transform_url, operation_name=operation_name).call gsutil_api.CopyObject(src_metadata, dest_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_keywrapper, encryption_tuple=self.boto_file_encryption_keywrapper, provider=transform_url.scheme, fields=[]) # Message indicating end of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=True, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE))