def transform_to_fixed_size_buckets(percentages_histogram, expected_buckets=200): """ Returns for given histogram to a normalized histogram in term of buckets. :param percentages_histogram: Histogram as returned per middleware in format :param expected_buckets: Number of buckets expected in the histogram :return: Histogram which has 100µs buckets between [0, expected_buckets/10 - 0.1] ms """ result = {} # Default initialize the result-dictionary to our needs for i in range(expected_buckets): result[StdLib.get_rounded_double(i / 10)] = 0.0 sum_beyond_range = 0 # Fill in the observed values to the dictionary for item in percentages_histogram: if item >= expected_buckets / 10: # Accumulate anything above X milliseconds sum_beyond_range += percentages_histogram.get(item) continue index = StdLib.get_rounded_double(item) result[index] = percentages_histogram.get(item) last_bucket = StdLib.get_rounded_double((expected_buckets - 1) / 10) result[last_bucket] += sum_beyond_range return result
def parse_result_line(line): """ Function to parse a memtier summary line, returns any non-valid numbers with default value 0.0. :param line: Line to parse (guaranteed by caller to be a valid summary line). :return: Parsed elements as tuples of doubles. """ r_type, throughput, hits, misses, response_time, data_throughput = line.split( ) throughput = StdLib.get_sane_double(throughput) response_time = StdLib.get_sane_double(response_time) data_throughput = StdLib.get_sane_double(data_throughput) if r_type == 'Gets': hits = StdLib.get_sane_double(hits) misses = StdLib.get_sane_double(misses) return { 'Request_Throughput': throughput, 'Hits': hits, 'Misses': misses, 'Response_Time': response_time, 'Data_Throughput': data_throughput } else: return { 'Request_Throughput': throughput, 'Response_Time': response_time, 'Data_Throughput': data_throughput }
def parse_histogram_entry(line): """ Function to parse a memtier histogram line, returns a valid tuple. :param line: Line to parse (guaranteed by caller to be a valid histogram line). :return: Parsed elements as tuples of doubles. """ _, bucket, cdf_until_and_including_bucket = line.split() bucket = StdLib.get_sane_double(bucket) cdf_until_and_including_bucket = StdLib.get_sane_double( cdf_until_and_including_bucket) return bucket, cdf_until_and_including_bucket
def normalize(self, entry, factor): if entry == 'Histogram_GET': print("Not implemented") if entry == 'Histogram_SET': print("Not implemented") if entry == 'GET': if self.get_observed['Request_Throughput'] is not None: multiplier = self.get_observed['Request_Throughput'] / factor for i in [ 'Request_Size', 'Queue_Waiting_Time', 'Memcached_Communication', 'Response_Time' ]: if i in self.get_observed: self.get_observed[ i] = self.get_observed[i] * multiplier if 'Key_Distribution' in self.get_observed.keys( ) and self.get_observed['Key_Distribution'] is not None: self.get_observed['Key_Distribution'] = tuple( StdLib.get_sane_double(x) * multiplier for x in self.get_observed['Key_Distribution']) self.get_interactive['Response_Time'] = self.get_interactive[ 'Response_Time'] * multiplier if entry == 'SET': if self.set_observed['Request_Throughput'] is not None: multiplier = self.set_observed['Request_Throughput'] / factor for i in [ 'Queue_Waiting_Time', 'Memcached_Communication', 'Response_Time' ]: # , 'Queue_Size']: if i in self.set_observed: self.set_observed[ i] = self.set_observed[i] * multiplier self.set_interactive['Response_Time'] = self.set_interactive[ 'Response_Time'] * multiplier
def parse_file(base_path, ping_target): ping_history = [] filename = base_path.joinpath(ping_target + ".ping") # Read in all lines with open(filename, "r") as file: for line in file: splits = line.split('=') if len(splits) > 3: ping_info = splits[3].split() if ping_info[1] == 'ms': ping_history.append( StdLib.get_sane_double(ping_info[0])) elif ping_info[1] == 's': ping_history.append( 1000 * StdLib.get_sane_double(ping_info[0])) else: print("Unexpected unit, got value {} {}", ping_info[0], ping_info[1]) return sum(ping_history) / len(ping_history)
def transform_to_fixed_size_buckets(percentages_histogram, expected_buckets=200): """ Returns for given histogram to a normalized histogram in term of buckets. :param percentages_histogram: Histogram as returned per memtier in format :param expected_buckets: Number of buckets expected in the histogram :return: Histogram which has 100µs buckets between [0, expected_buckets/10 - 0.1] ms """ result = {} # Default initialize the result-dictionary to our needs for i in range(expected_buckets): result[StdLib.get_rounded_double(i / 10)] = 0.0 sum_below_range = 0 sum_beyond_range = 0 current_below = 0 # Fill in the observed values to the dictionary for item in percentages_histogram: if item >= expected_buckets / 10: # Accumulate anything above X milliseconds sum_beyond_range += percentages_histogram.get(item) continue if item <= 1: if int(item * 10) - int(current_below * 10) > 0.99: if not current_below == 0.0: index = StdLib.get_rounded_double( int(current_below * 10) / 10) result[index] = sum_below_range sum_below_range = percentages_histogram.get(item) current_below = item if item == 1: current_below = 0 sum_below_range = 0 index = StdLib.get_rounded_double(item) result[index] = percentages_histogram.get(item) else: sum_below_range += percentages_histogram.get(item) continue # Try to expand buckets and divide respective percentages evenly -> uniform distribution inside buckets bucket_count = int( math.pow(10, math.ceil(math.log10(item + 0.1)) - 1)) bucket_value = StdLib.safe_div(percentages_histogram.get(item), bucket_count) for i in range(bucket_count): index = StdLib.get_rounded_double(item + i / 10) result[index] = bucket_value last_bucket = StdLib.get_rounded_double((expected_buckets - 1) / 10) result[last_bucket] += sum_beyond_range return result
def dictionary_keywise_add(dict1, dict2): result = { key: dict1.get(key) + dict2.get(key) for key in set(dict1) if dict1.get(key) is not None } for key in set(dict1): if dict1.get(key) is None: result[key] = None if 'Request_Throughput' in set(dict1): keys_to_average = ['Response_Time'] for key in keys_to_average: if key in set(dict1): if dict1['Request_Throughput'] is None or dict1[ key] is None: result[key] = None else: result[key] = StdLib.safe_div( ((dict1['Request_Throughput'] * dict1[key]) + (dict2['Request_Throughput'] * dict2[key])), result['Request_Throughput']) return result
def parse_file(self, base_path, base_filename, histograms): """ Main parsing method which sets the instance fields to parsed values :param base_path: Base path to the memtier file to parse :param base_filename: Base filename to the memtier file to parse (without file ending) :param histograms: Parse histograms, implies the parsing of the summary only and not history! :return: Nothing """ path_interpretation = PathHelper.interpret_path(base_path) self.clients = int(path_interpretation['vc']) * int( path_interpretation['ct']) if histograms: # Histograms include the full range, therefore use the summary as a shortcut set_histogram_memtier = [(0, 0)] get_histogram_memtier = [(0, 0)] regex_seconds = r"^\d+\s+Seconds" regex_set_results = r"^Sets" regex_get_results = r"^Gets" filename = base_path.joinpath(base_filename + '.stdout') regex_get_histogram_entry = r"^GET\s+" regex_set_histogram_entry = r"^SET\s+" with open(filename, "r") as file: for line in file: if re.match(regex_seconds, line): self.seconds = StdLib.get_sane_int(line.split()[0]) continue if re.match(regex_set_results, line): if path_interpretation['type'] != 'GET': self.set_observed = MemtierParser.parse_result_line( line) continue if re.match(regex_get_results, line): if path_interpretation['type'] != 'SET': self.get_observed = MemtierParser.parse_result_line( line) continue if re.match(regex_get_histogram_entry, line): get_histogram_memtier.append( MemtierParser.parse_histogram_entry(line)) continue if re.match(regex_set_histogram_entry, line): set_histogram_memtier.append( MemtierParser.parse_histogram_entry(line)) continue memtier_set_histogram_percentage = dict( MemtierParser.transform_from_cdf(set_histogram_memtier)) memtier_get_histogram_percentage = dict( MemtierParser.transform_from_cdf(get_histogram_memtier)) self.set_histogram_percentage = Parser.transform_to_fixed_size_buckets( memtier_set_histogram_percentage) self.get_histogram_percentage = Parser.transform_to_fixed_size_buckets( memtier_get_histogram_percentage) if self.set_observed['Request_Throughput'] is None: self.set_histogram_count = Parser.percentages_to_counts( self.set_histogram_percentage, 0) else: self.set_histogram_count = Parser.percentages_to_counts( self.set_histogram_percentage, self.set_observed['Request_Throughput'] * self.seconds) if self.get_observed['Request_Throughput'] is None: self.get_histogram_count = Parser.percentages_to_counts( self.get_histogram_percentage, 0) else: self.get_histogram_count = Parser.percentages_to_counts( self.get_histogram_percentage, self.get_observed['Request_Throughput'] * self.seconds) else: # Memtier history should be used to cut off the first 10 and consume the next consecutive 60 seconds filename = base_path.joinpath(base_filename + '.stderr') memtier_history = [] regex_history = r"\[RUN\s+#\d+\s+\d+%,\s+(\d+)\s+secs\]\s+\d+\s+threads:\s+\d+\s+ops,\s+(\d+)\s+\(avg:\s+\d+\)\s+ops/sec,\s+\d+\.\d+../sec\s+\(avg:\s+\d+\.\d+../sec\),\s+(\d+\.\d+)\s+\(avg:\s+\d+\.\d+\)\s+msec\s+latency" # Read in all lines with open(filename, "r") as file: for line in file: result = re.findall(regex_history, line) for second, ops, latency in result: memtier_history.append( (int(second), int(ops), float(latency))) # Extract the actual 60 second window high_performance_section = MemtierParser.extract_stable_window( memtier_history) seconds = len(high_performance_section) # Calculate the averages from extracted ops and latencies _, average_throughput, average_response_time = map( lambda x: x / seconds, [sum(x) for x in zip(*high_performance_section)]) self.seconds = seconds # Store extracted values in the correct result type if path_interpretation['type'] == 'GET': self.get_observed = { 'Request_Throughput': average_throughput, 'Hits': None, 'Misses': None, 'Response_Time': average_response_time, 'Data_Throughput': None } t = MemtierParser.get_line_from_summary( base_path, base_filename) self.get_observed['Hits'] = t['Hits'] self.get_observed['Misses'] = t['Misses'] elif path_interpretation['type'] == 'SET': self.set_observed = { 'Request_Throughput': average_throughput, 'Response_Time': average_response_time, 'Data_Throughput': None } else: # Assumption: Latencies stay constant between GET and SET requests. This is incorrect as such this # should not be used for mixed-type approaches! self.get_observed = { 'Request_Throughput': average_throughput / 2, 'Hits': None, 'Misses': None, 'Response_Time': average_response_time, 'Data_Throughput': None } self.set_observed = { 'Request_Throughput': average_throughput / 2, 'Response_Time': average_response_time, 'Data_Throughput': None } t = MemtierParser.get_line_from_summary( base_path, base_filename) self.get_observed['Hits'] = t['Hits'] self.get_observed['Misses'] = t['Misses'] self.set_interactive = Parser.interactive_law_check( self.set_observed, self.clients) self.get_interactive = Parser.interactive_law_check( self.get_observed, self.clients)