def __init__(self): self.num_processes = 0 self.num_threads = 0 self.num_retryable_service_errors = 0 self.num_retryable_network_errors = 0 self.provider_types = set() # Store the disk stats at the beginning of the command so we can calculate # time spent on disk I/O. if system_util.IS_LINUX: self.disk_counters_start = system_util.GetDiskCounters() # True if using fan parallelism, when the user specifies the -m option. self.uses_fan = False # True if the command uses slice parallelism. self.uses_slice = False # The total times in seconds spent idle and executing by threads. self.thread_idle_time = 0 self.thread_execution_time = 0 # This maps (process id, thread id) to a _ThreadThroughputInfo object, # keeping track of elapsed time and bytes processed. self.thread_throughputs = defaultdict( self._ThreadThroughputInformation) # Data transfer statistics. self.avg_throughput = None # This is the amount of time spent on the Apply call of cp and rsync. self.total_elapsed_time = None self.total_bytes_transferred = None self.num_objects_transferred = 0 # Information to determine the type of transfer. self.is_daisy_chain = False self.has_file_dst = False self.has_cloud_dst = False self.has_file_src = False self.has_cloud_src = False
def _CollectPerformanceSummaryMetric(self): """Aggregates PerformanceSummary info and adds the metric to the list.""" if self.perf_sum_params is None: return custom_params = {} # These parameters need no further processing. for attr_name, label in ( ('num_processes', 'Num Processes'), ('num_threads', 'Num Threads'), ('num_retryable_service_errors', 'Num Retryable Service Errors'), ('num_retryable_network_errors', 'Num Retryable Network Errors'), ('avg_throughput', 'Average Overall Throughput'), ('num_objects_transferred', 'Number of Files/Objects Transferred'), ('total_bytes_transferred', 'Size of Files/Objects Transferred'), ): custom_params[_GA_LABEL_MAP[label]] = getattr(self.perf_sum_params, attr_name) # Calculate the disk stats again to calculate deltas of time spent on I/O. if system_util.IS_LINUX: disk_start = self.perf_sum_params.disk_counters_start disk_end = system_util.GetDiskCounters() # Read and write time are the 5th and 6th elements of the stat tuple. custom_params[_GA_LABEL_MAP['Disk I/O Time']] = ( sum([stat[4] + stat[5] for stat in disk_end.values()]) - sum([stat[4] + stat[5] for stat in disk_start.values()])) # Determine source URL type(s). if self.perf_sum_params.has_cloud_src: src_url_type = 'both' if self.perf_sum_params.has_file_src else 'cloud' else: src_url_type = 'file' custom_params[_GA_LABEL_MAP['Source URL Type']] = src_url_type # Determine the type of parallelism used, if any. if self.perf_sum_params.uses_fan: strategy = 'both' if self.perf_sum_params.uses_slice else 'fan' else: strategy = 'slice' if self.perf_sum_params.uses_slice else 'none' custom_params[_GA_LABEL_MAP['Parallelism Strategy']] = strategy # Determine the percentage of time that threads spent idle. total_time = (self.perf_sum_params.thread_idle_time + self.perf_sum_params.thread_execution_time) if total_time: custom_params[_GA_LABEL_MAP['Thread Idle Time Percent']] = ( float(self.perf_sum_params.thread_idle_time) / float(total_time)) # Determine the slowest and fastest thread throughputs. if self.perf_sum_params.thread_throughputs: throughputs = [ thread.GetThroughput() for thread in self.perf_sum_params.thread_throughputs.values() ] custom_params[_GA_LABEL_MAP['Slowest Thread Throughput']] = min( throughputs) custom_params[_GA_LABEL_MAP['Fastest Thread Throughput']] = max( throughputs) # Determine the provider(s) used. custom_params[_GA_LABEL_MAP['Provider Types']] = ','.join( sorted(self.perf_sum_params.provider_types)) # Determine the transfer types. # This maps a transfer type to whether the condition has been met for it. transfer_types = { 'CloudToCloud': self.perf_sum_params.has_cloud_src and self.perf_sum_params.has_cloud_dst, 'CloudToFile': self.perf_sum_params.has_cloud_src and self.perf_sum_params.has_file_dst, 'DaisyChain': self.perf_sum_params.is_daisy_chain, 'FileToCloud': self.perf_sum_params.has_file_src and self.perf_sum_params.has_cloud_dst, 'FileToFile': self.perf_sum_params.has_file_src and self.perf_sum_params.has_file_dst, } action = ','.join( sorted([ transfer_type for transfer_type, cond in six.iteritems(transfer_types) if cond ])) # Use the time spent on Apply rather than the total command execution time # for the execution time metric. This aligns more closely with throughput # and bytes transferred, and the corresponding Command event already tells # us the total time. If PerformanceSummary events are expanded, this may not # reflect one Apply call as commands like rm may call Apply twice. Currently # Apply is timed directly in the RunCommand methods of cp and rsync. apply_execution_time = _GetTimeInMillis( self.perf_sum_params.total_elapsed_time) self.CollectGAMetric(category=_GA_PERFSUM_CATEGORY, action=action, execution_time=apply_execution_time, **custom_params)