def testPerformanceSummaryEventCollection(self): """Test the collection of PerformanceSummary GA events.""" # PerformanceSummaries are only collected for cp and rsync. self.collector.ga_params[metrics._GA_LABEL_MAP['Command Name']] = 'cp' # GetDiskCounters is called at initialization of _PerformanceSummaryParams, # which occurs during the first call to LogPerformanceSummaryParams. with mock.patch('gslib.metrics.GetDiskCounters', return_value={'fake-disk': (0, 0, 0, 0, 0, 0)}): metrics.LogPerformanceSummaryParams( uses_fan=True, uses_slice=True, avg_throughput=10, is_daisy_chain=True, has_file_dst=False, has_cloud_dst=True, has_file_src=False, has_cloud_src=True, total_bytes_transferred=100, total_elapsed_time=10, thread_idle_time=40, thread_execution_time=10, num_processes=2, num_threads=3, num_objects_transferred=3, provider_types=['gs']) # Log a retryable service error and two retryable network errors. service_retry_msg = RetryableErrorMessage( apitools_exceptions.CommunicationError(), 0) network_retry_msg = RetryableErrorMessage(socket.error(), 0) metrics.LogRetryableError(service_retry_msg) metrics.LogRetryableError(network_retry_msg) metrics.LogRetryableError(network_retry_msg) # Log some thread throughput. start_file_msg = FileMessage('src', 'dst', 0, size=100) end_file_msg = FileMessage('src', 'dst', 10, finished=True) start_file_msg.thread_id = end_file_msg.thread_id = 1 start_file_msg.process_id = end_file_msg.process_id = 1 metrics.LogPerformanceSummaryParams(file_message=start_file_msg) metrics.LogPerformanceSummaryParams(file_message=end_file_msg) self.assertEqual(self.collector.perf_sum_params.thread_throughputs[ (1, 1)].GetThroughput(), 10) # GetDiskCounters is called a second time during collection. with mock.patch('gslib.metrics.GetDiskCounters', return_value={'fake-disk': (0, 0, 0, 0, 10, 10)}): self.collector._CollectPerformanceSummaryMetric() # Check for all the expected parameters. metric_body = self.collector._metrics[0].body label_and_value_pairs = [ ('Event Category', metrics._GA_PERFSUM_CATEGORY), ('Event Action', 'CloudToCloud%2CDaisyChain'), ('Execution Time', '10'), ('Parallelism Strategy', 'both'), ('Source URL Type', 'cloud'), ('Provider Types', 'gs'), ('Num Processes', '2'), ('Num Threads', '3'), ('Number of Files/Objects Transferred', '3'), ('Size of Files/Objects Transferred', '100'), ('Average Overall Throughput', '10'), ('Num Retryable Service Errors', '1'), ('Num Retryable Network Errors', '2'), ('Thread Idle Time Percent', '0.8'), ('Slowest Thread Throughput', '10'), ('Fastest Thread Throughput', '10'), ] if IS_LINUX: # Disk I/O time is only available on Linux. label_and_value_pairs.append(('Disk I/O Time', '20')) for label, exp_value in label_and_value_pairs: self.assertIn('{0}={1}'.format(metrics._GA_LABEL_MAP[label], exp_value), metric_body)
def RunCommand(self): """Command entry point for the hash command.""" (calc_crc32c, calc_md5, format_func, cloud_format_func, output_format) = ( self._ParseOpts(self.sub_opts, self.logger)) matched_one = False for url_str in self.args: for file_ref in self.WildcardIterator( url_str).IterObjects(bucket_listing_fields=['crc32c', 'md5Hash', 'customerEncryption', 'size']): matched_one = True url = StorageUrlFromString(url_str) file_name = file_ref.storage_url.object_name if StorageUrlFromString(url_str).IsFileUrl(): file_size = os.path.getsize(file_name) self.gsutil_api.status_queue.put( FileMessage(url, None, time.time(), size=file_size, finished=False, message_type=FileMessage.FILE_HASH)) callback_processor = ProgressCallbackWithTimeout( file_size, FileProgressCallbackHandler( self.gsutil_api.status_queue, src_url=StorageUrlFromString(url_str), operation_name='Hashing').call) hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5) with open(file_name, 'rb') as fp: CalculateHashesFromContents(fp, hash_dict, callback_processor=callback_processor) self.gsutil_api.status_queue.put( FileMessage(url, None, time.time(), size=file_size, finished=True, message_type=FileMessage.FILE_HASH)) else: hash_dict = {} obj_metadata = file_ref.root_object file_size = obj_metadata.size md5_present = obj_metadata.md5Hash is not None crc32c_present = obj_metadata.crc32c is not None if not md5_present and not crc32c_present: logging.getLogger().warn('No hashes present for %s', url_str) continue if md5_present: hash_dict['md5'] = obj_metadata.md5Hash if crc32c_present: hash_dict['crc32c'] = obj_metadata.crc32c print 'Hashes [%s] for %s:' % (output_format, file_name) for name, digest in hash_dict.iteritems(): print '\tHash (%s):\t\t%s' % (name, (format_func(digest) if url.IsFileUrl() else cloud_format_func(digest))) if not matched_one: raise CommandException('No files matched') PutToQueueWithTimeout(self.gsutil_api.status_queue, FinalMessage(time.time())) return 0
def RewriteFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) transform_url = name_expansion_result.expanded_storage_url # Make a local copy of the requested transformations for each thread. As # a redundant transformation for one object might not be redundant for # another, we wouldn't want to remove it from the transform_types set that # all threads share. transforms_to_perform = set(self.transform_types) self.CheckProvider(transform_url) # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) # Note: If other transform types are added, they must ensure that the # encryption key configuration matches the boto configuration, because # gsutil maintains an invariant that all objects it writes use the # encryption_key value (including decrypting if no key is present). src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 should_encrypt_target = self.boto_file_encryption_sha256 is not None source_was_encrypted = src_encryption_sha256 is not None using_same_encryption_key_value = ( src_encryption_sha256 == self.boto_file_encryption_sha256) # Prevent accidental key rotation. if (_TransformTypes.CRYPTO_KEY not in transforms_to_perform and not using_same_encryption_key_value): raise EncryptionException( 'The "-k" flag was not passed to the rewrite command, but the ' 'encryption_key value in your boto config file did not match the key ' 'used to encrypt the object "%s" (hash: %s). To encrypt the object ' 'using a different key, you must specify the "-k" flag.' % (transform_url, src_encryption_sha256)) # Remove any redundant changes. # STORAGE_CLASS transform should be skipped if the target storage class # matches the existing storage class. if (_TransformTypes.STORAGE_CLASS in transforms_to_perform and self.dest_storage_class == NormalizeStorageClass( src_metadata.storageClass)): transforms_to_perform.remove(_TransformTypes.STORAGE_CLASS) self.logger.info( 'Redundant transform: %s already had storage class of ' '%s.' % (transform_url, src_metadata.storageClass)) # CRYPTO_KEY transform should be skipped if we're using the same encryption # key (if any) that was used to encrypt the source. if (_TransformTypes.CRYPTO_KEY in transforms_to_perform and using_same_encryption_key_value): if self.boto_file_encryption_sha256 is None: log_msg = '%s is already decrypted.' % transform_url else: log_msg = '%s already has current encryption key.' % transform_url transforms_to_perform.remove(_TransformTypes.CRYPTO_KEY) self.logger.info('Redundant transform: %s' % log_msg) if not transforms_to_perform: self.logger.info( 'Skipping %s, all transformations were redundant.' % transform_url) return # Make a deep copy of the source metadata. dst_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dst_metadata.customerEncryption = None dst_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dst_metadata.id = None decryption_tuple = None # Use a generic operation name by default - this can be altered below for # specific transformations (encryption changes, etc.). operation_name = 'Rewriting' if source_was_encrypted: decryption_key = FindMatchingCryptoKey(src_encryption_sha256) if not decryption_key: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) decryption_tuple = CryptoTupleFromKey(decryption_key) if _TransformTypes.CRYPTO_KEY in transforms_to_perform: if not source_was_encrypted: operation_name = 'Encrypting' elif not should_encrypt_target: operation_name = 'Decrypting' else: operation_name = 'Rotating' if _TransformTypes.STORAGE_CLASS in transforms_to_perform: dst_metadata.storageClass = self.dest_storage_class # TODO: Remove this call (used to verify tests) and make it processed by # the UIThread. sys.stderr.write( _ConstructAnnounceText(operation_name, transform_url.url_string)) # Message indicating beginning of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=False, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE)) progress_callback = FileProgressCallbackHandler( gsutil_api.status_queue, src_url=transform_url, operation_name=operation_name).call gsutil_api.CopyObject(src_metadata, dst_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_tuple, encryption_tuple=self.boto_file_encryption_tuple, provider=transform_url.scheme, fields=[]) # Message indicating end of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=True, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE))
def RewriteFunc(self, name_expansion_result, thread_state=None): gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) transform_url = name_expansion_result.expanded_storage_url self.CheckProvider(transform_url) # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) # Note: If other transform types are added, they must ensure that the # encryption key configuration matches the boto configuration, because # gsutil maintains an invariant that all objects it writes use the # encryption_key value (including decrypting if no key is present). # Store metadata about src encryption to make logic below easier to read. src_encryption_kms_key = (src_metadata.kmsKeyName if src_metadata.kmsKeyName else None) src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 # In python3, hashes are bytes, use ascii since it should be ascii src_encryption_sha256 = src_encryption_sha256.encode('ascii') src_was_encrypted = (src_encryption_sha256 is not None or src_encryption_kms_key is not None) # Also store metadata about dest encryption. dest_encryption_kms_key = None if (self.boto_file_encryption_keywrapper is not None and self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CMEK): dest_encryption_kms_key = self.boto_file_encryption_keywrapper.crypto_key dest_encryption_sha256 = None if (self.boto_file_encryption_keywrapper is not None and self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CSEK): dest_encryption_sha256 = ( self.boto_file_encryption_keywrapper.crypto_key_sha256) should_encrypt_dest = self.boto_file_encryption_keywrapper is not None encryption_unchanged = (src_encryption_sha256 == dest_encryption_sha256 and src_encryption_kms_key == dest_encryption_kms_key) # Prevent accidental key rotation. if (_TransformTypes.CRYPTO_KEY not in self.transform_types and not encryption_unchanged): raise EncryptionException( 'The "-k" flag was not passed to the rewrite command, but the ' 'encryption_key value in your boto config file did not match the key ' 'used to encrypt the object "%s" (hash: %s). To encrypt the object ' 'using a different key, you must specify the "-k" flag.' % (transform_url, src_encryption_sha256)) # Determine if we can skip this rewrite operation (this should only be done # when ALL of the specified transformations are redundant). redundant_transforms = [] # STORAGE_CLASS transform is redundant if the target storage class matches # the existing storage class. if (_TransformTypes.STORAGE_CLASS in self.transform_types and self.dest_storage_class == NormalizeStorageClass( src_metadata.storageClass)): redundant_transforms.append('storage class') # CRYPTO_KEY transform is redundant if we're using the same encryption # key that was used to encrypt the source. However, if no encryption key was # specified, we should still perform the rewrite. This results in the # rewritten object either being encrypted with its bucket's default KMS key # or having no CSEK/CMEK encryption applied. While we could attempt fetching # the bucket's metadata and checking its default KMS key before performing # the rewrite (in the case where we appear to be transitioning from # no key to no key), that is vulnerable to the race condition where the # default KMS key is changed between when we check it and when we rewrite # the object. if (_TransformTypes.CRYPTO_KEY in self.transform_types and should_encrypt_dest and encryption_unchanged): redundant_transforms.append('encryption key') if len(redundant_transforms) == len(self.transform_types): self.logger.info('Skipping %s, all transformations were redundant: %s' % (transform_url, redundant_transforms)) return # First make a deep copy of the source metadata, then overwrite any # requested attributes (e.g. if a storage class change was specified). dest_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dest_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dest_metadata.id = None # Ensure we don't copy over the KMS key name or CSEK key info from the # source object; those should only come from the boto config's # encryption_key value. dest_metadata.customerEncryption = None dest_metadata.kmsKeyName = None # Both a storage class change and CMEK encryption should be set as part of # the dest object's metadata. CSEK encryption, if specified, is added to the # request later via headers obtained from the keywrapper value passed to # encryption_tuple. if _TransformTypes.STORAGE_CLASS in self.transform_types: dest_metadata.storageClass = self.dest_storage_class if dest_encryption_kms_key is not None: dest_metadata.kmsKeyName = dest_encryption_kms_key # Make sure we have the CSEK key necessary to decrypt. decryption_keywrapper = None if src_encryption_sha256 is not None: if src_encryption_sha256 in self.csek_hash_to_keywrapper: decryption_keywrapper = ( self.csek_hash_to_keywrapper[src_encryption_sha256]) else: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) operation_name = 'Rewriting' if _TransformTypes.CRYPTO_KEY in self.transform_types: if src_was_encrypted and should_encrypt_dest: if not encryption_unchanged: operation_name = 'Rotating' # Else, keep "Rewriting". This might occur when -k was specified and was # redundant, but we're performing the operation anyway because some # other transformation was not redundant. elif src_was_encrypted and not should_encrypt_dest: operation_name = 'Decrypting' elif not src_was_encrypted and should_encrypt_dest: operation_name = 'Encrypting' # TODO: Remove this call (used to verify tests) and make it processed by # the UIThread. sys.stderr.write( _ConstructAnnounceText(operation_name, transform_url.url_string)) sys.stderr.flush() # Message indicating beginning of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=False, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE)) progress_callback = FileProgressCallbackHandler( gsutil_api.status_queue, src_url=transform_url, operation_name=operation_name).call gsutil_api.CopyObject(src_metadata, dest_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_keywrapper, encryption_tuple=self.boto_file_encryption_keywrapper, provider=transform_url.scheme, fields=[]) # Message indicating end of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=True, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE))
def CryptoRewrite(self, transform_url, gsutil_api): """Make the cloud object at transform_url match encryption configuration. Args: transform_url: CloudUrl to rewrite. gsutil_api: gsutil CloudApi instance for making API calls. """ # Get all fields so that we can ensure that the target metadata is # specified correctly. src_metadata = gsutil_api.GetObjectMetadata( transform_url.bucket_name, transform_url.object_name, generation=transform_url.generation, provider=transform_url.scheme) if self.no_preserve_acl: # Leave ACL unchanged. src_metadata.acl = [] elif not src_metadata.acl: raise CommandException( 'No OWNER permission found for object %s. OWNER permission is ' 'required for rewriting objects, (otherwise their ACLs would be ' 'reset).' % transform_url) src_encryption_sha256 = None if (src_metadata.customerEncryption and src_metadata.customerEncryption.keySha256): src_encryption_sha256 = src_metadata.customerEncryption.keySha256 if src_encryption_sha256 == self.current_encryption_sha256: if self.current_encryption_sha256 is not None: self.logger.info( 'Skipping %s, already has current encryption key' % transform_url) else: self.logger.info('Skipping %s, already decrypted' % transform_url) else: # Make a deep copy of the source metadata dst_metadata = encoding.PyValueToMessage( apitools_messages.Object, encoding.MessageToPyValue(src_metadata)) # Remove some unnecessary/invalid fields. dst_metadata.customerEncryption = None dst_metadata.generation = None # Service has problems if we supply an ID, but it is responsible for # generating one, so it is not necessary to include it here. dst_metadata.id = None decryption_tuple = None if src_encryption_sha256 is None: operation_name = 'Encrypting' else: decryption_key = FindMatchingCryptoKey(src_encryption_sha256) if not decryption_key: raise EncryptionException( 'Missing decryption key with SHA256 hash %s. No decryption key ' 'matches object %s' % (src_encryption_sha256, transform_url)) decryption_tuple = CryptoTupleFromKey(decryption_key) if self.current_encryption_sha256 is None: operation_name = 'Decrypting' else: operation_name = 'Rotating' # TODO: Remove this call (used to verify tests) and make it processed by # the UIThread. sys.stderr.write( _ConstructAnnounceText(operation_name, transform_url.url_string)) # Message indicating beginning of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=False, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE)) progress_callback = FileProgressCallbackHandler( gsutil_api.status_queue, src_url=transform_url, operation_name=operation_name).call gsutil_api.CopyObject( src_metadata, dst_metadata, src_generation=transform_url.generation, preconditions=self.preconditions, progress_callback=progress_callback, decryption_tuple=decryption_tuple, encryption_tuple=self.current_encryption_tuple, provider=transform_url.scheme, fields=[]) # Message indicating end of operation. gsutil_api.status_queue.put( FileMessage(transform_url, None, time.time(), finished=True, size=src_metadata.size, message_type=FileMessage.FILE_REWRITE))