Ejemplo n.º 1
0
    def transform_to_fixed_size_buckets(percentages_histogram,
                                        expected_buckets=200):
        """
        Returns for given histogram to a normalized histogram in term of buckets.
        :param percentages_histogram: Histogram as returned per middleware in format
        :param expected_buckets: Number of buckets expected in the histogram
        :return: Histogram which has 100µs buckets between [0, expected_buckets/10 - 0.1] ms
        """
        result = {}

        # Default initialize the result-dictionary to our needs
        for i in range(expected_buckets):
            result[StdLib.get_rounded_double(i / 10)] = 0.0

        sum_beyond_range = 0
        # Fill in the observed values to the dictionary
        for item in percentages_histogram:
            if item >= expected_buckets / 10:
                # Accumulate anything above X milliseconds
                sum_beyond_range += percentages_histogram.get(item)
                continue
            index = StdLib.get_rounded_double(item)
            result[index] = percentages_histogram.get(item)

        last_bucket = StdLib.get_rounded_double((expected_buckets - 1) / 10)
        result[last_bucket] += sum_beyond_range

        return result
Ejemplo n.º 2
0
    def parse_result_line(line):
        """
        Function to parse a memtier summary line, returns any non-valid numbers with default value 0.0.
        :param line: Line to parse (guaranteed by caller to be a valid summary line).
        :return: Parsed elements as tuples of doubles.
        """
        r_type, throughput, hits, misses, response_time, data_throughput = line.split(
        )

        throughput = StdLib.get_sane_double(throughput)
        response_time = StdLib.get_sane_double(response_time)
        data_throughput = StdLib.get_sane_double(data_throughput)
        if r_type == 'Gets':
            hits = StdLib.get_sane_double(hits)
            misses = StdLib.get_sane_double(misses)
            return {
                'Request_Throughput': throughput,
                'Hits': hits,
                'Misses': misses,
                'Response_Time': response_time,
                'Data_Throughput': data_throughput
            }
        else:
            return {
                'Request_Throughput': throughput,
                'Response_Time': response_time,
                'Data_Throughput': data_throughput
            }
Ejemplo n.º 3
0
    def parse_histogram_entry(line):
        """
        Function to parse a memtier histogram line, returns a valid tuple.
        :param line: Line to parse (guaranteed by caller to be a valid histogram line).
        :return: Parsed elements as tuples of doubles.
        """
        _, bucket, cdf_until_and_including_bucket = line.split()

        bucket = StdLib.get_sane_double(bucket)
        cdf_until_and_including_bucket = StdLib.get_sane_double(
            cdf_until_and_including_bucket)

        return bucket, cdf_until_and_including_bucket
Ejemplo n.º 4
0
 def normalize(self, entry, factor):
     if entry == 'Histogram_GET':
         print("Not implemented")
     if entry == 'Histogram_SET':
         print("Not implemented")
     if entry == 'GET':
         if self.get_observed['Request_Throughput'] is not None:
             multiplier = self.get_observed['Request_Throughput'] / factor
             for i in [
                     'Request_Size', 'Queue_Waiting_Time',
                     'Memcached_Communication', 'Response_Time'
             ]:
                 if i in self.get_observed:
                     self.get_observed[
                         i] = self.get_observed[i] * multiplier
             if 'Key_Distribution' in self.get_observed.keys(
             ) and self.get_observed['Key_Distribution'] is not None:
                 self.get_observed['Key_Distribution'] = tuple(
                     StdLib.get_sane_double(x) * multiplier
                     for x in self.get_observed['Key_Distribution'])
             self.get_interactive['Response_Time'] = self.get_interactive[
                 'Response_Time'] * multiplier
     if entry == 'SET':
         if self.set_observed['Request_Throughput'] is not None:
             multiplier = self.set_observed['Request_Throughput'] / factor
             for i in [
                     'Queue_Waiting_Time', 'Memcached_Communication',
                     'Response_Time'
             ]:  # , 'Queue_Size']:
                 if i in self.set_observed:
                     self.set_observed[
                         i] = self.set_observed[i] * multiplier
             self.set_interactive['Response_Time'] = self.set_interactive[
                 'Response_Time'] * multiplier
Ejemplo n.º 5
0
    def parse_file(base_path, ping_target):
        ping_history = []
        filename = base_path.joinpath(ping_target + ".ping")
        # Read in all lines
        with open(filename, "r") as file:
            for line in file:
                splits = line.split('=')
                if len(splits) > 3:
                    ping_info = splits[3].split()
                    if ping_info[1] == 'ms':
                        ping_history.append(
                            StdLib.get_sane_double(ping_info[0]))
                    elif ping_info[1] == 's':
                        ping_history.append(
                            1000 * StdLib.get_sane_double(ping_info[0]))
                    else:
                        print("Unexpected unit, got value {} {}", ping_info[0],
                              ping_info[1])

        return sum(ping_history) / len(ping_history)
Ejemplo n.º 6
0
    def transform_to_fixed_size_buckets(percentages_histogram,
                                        expected_buckets=200):
        """
        Returns for given histogram to a normalized histogram in term of buckets.
        :param percentages_histogram: Histogram as returned per memtier in format
        :param expected_buckets: Number of buckets expected in the histogram
        :return: Histogram which has 100µs buckets between [0, expected_buckets/10 - 0.1] ms
        """
        result = {}

        # Default initialize the result-dictionary to our needs
        for i in range(expected_buckets):
            result[StdLib.get_rounded_double(i / 10)] = 0.0

        sum_below_range = 0
        sum_beyond_range = 0
        current_below = 0
        # Fill in the observed values to the dictionary
        for item in percentages_histogram:
            if item >= expected_buckets / 10:
                # Accumulate anything above X milliseconds
                sum_beyond_range += percentages_histogram.get(item)
                continue
            if item <= 1:
                if int(item * 10) - int(current_below * 10) > 0.99:
                    if not current_below == 0.0:
                        index = StdLib.get_rounded_double(
                            int(current_below * 10) / 10)
                        result[index] = sum_below_range
                    sum_below_range = percentages_histogram.get(item)
                    current_below = item
                    if item == 1:
                        current_below = 0
                        sum_below_range = 0
                        index = StdLib.get_rounded_double(item)
                        result[index] = percentages_histogram.get(item)
                else:
                    sum_below_range += percentages_histogram.get(item)
                continue
            # Try to expand buckets and divide respective percentages evenly -> uniform distribution inside buckets
            bucket_count = int(
                math.pow(10,
                         math.ceil(math.log10(item + 0.1)) - 1))
            bucket_value = StdLib.safe_div(percentages_histogram.get(item),
                                           bucket_count)
            for i in range(bucket_count):
                index = StdLib.get_rounded_double(item + i / 10)
                result[index] = bucket_value

        last_bucket = StdLib.get_rounded_double((expected_buckets - 1) / 10)
        result[last_bucket] += sum_beyond_range

        return result
Ejemplo n.º 7
0
    def dictionary_keywise_add(dict1, dict2):
        result = {
            key: dict1.get(key) + dict2.get(key)
            for key in set(dict1) if dict1.get(key) is not None
        }
        for key in set(dict1):
            if dict1.get(key) is None:
                result[key] = None

        if 'Request_Throughput' in set(dict1):
            keys_to_average = ['Response_Time']
            for key in keys_to_average:
                if key in set(dict1):
                    if dict1['Request_Throughput'] is None or dict1[
                            key] is None:
                        result[key] = None
                    else:
                        result[key] = StdLib.safe_div(
                            ((dict1['Request_Throughput'] * dict1[key]) +
                             (dict2['Request_Throughput'] * dict2[key])),
                            result['Request_Throughput'])
        return result
Ejemplo n.º 8
0
    def parse_file(self, base_path, base_filename, histograms):
        """
        Main parsing method which sets the instance fields to parsed values
        :param base_path: Base path to the memtier file to parse
        :param base_filename: Base filename to the memtier file to parse (without file ending)
        :param histograms: Parse histograms, implies the parsing of the summary only and not history!
        :return: Nothing
        """
        path_interpretation = PathHelper.interpret_path(base_path)
        self.clients = int(path_interpretation['vc']) * int(
            path_interpretation['ct'])
        if histograms:
            # Histograms include the full range, therefore use the summary as a shortcut
            set_histogram_memtier = [(0, 0)]
            get_histogram_memtier = [(0, 0)]

            regex_seconds = r"^\d+\s+Seconds"

            regex_set_results = r"^Sets"
            regex_get_results = r"^Gets"

            filename = base_path.joinpath(base_filename + '.stdout')

            regex_get_histogram_entry = r"^GET\s+"
            regex_set_histogram_entry = r"^SET\s+"

            with open(filename, "r") as file:
                for line in file:
                    if re.match(regex_seconds, line):
                        self.seconds = StdLib.get_sane_int(line.split()[0])
                        continue
                    if re.match(regex_set_results, line):
                        if path_interpretation['type'] != 'GET':
                            self.set_observed = MemtierParser.parse_result_line(
                                line)
                        continue
                    if re.match(regex_get_results, line):
                        if path_interpretation['type'] != 'SET':
                            self.get_observed = MemtierParser.parse_result_line(
                                line)
                        continue
                    if re.match(regex_get_histogram_entry, line):
                        get_histogram_memtier.append(
                            MemtierParser.parse_histogram_entry(line))
                        continue
                    if re.match(regex_set_histogram_entry, line):
                        set_histogram_memtier.append(
                            MemtierParser.parse_histogram_entry(line))
                        continue

            memtier_set_histogram_percentage = dict(
                MemtierParser.transform_from_cdf(set_histogram_memtier))
            memtier_get_histogram_percentage = dict(
                MemtierParser.transform_from_cdf(get_histogram_memtier))

            self.set_histogram_percentage = Parser.transform_to_fixed_size_buckets(
                memtier_set_histogram_percentage)
            self.get_histogram_percentage = Parser.transform_to_fixed_size_buckets(
                memtier_get_histogram_percentage)

            if self.set_observed['Request_Throughput'] is None:
                self.set_histogram_count = Parser.percentages_to_counts(
                    self.set_histogram_percentage, 0)
            else:
                self.set_histogram_count = Parser.percentages_to_counts(
                    self.set_histogram_percentage,
                    self.set_observed['Request_Throughput'] * self.seconds)
            if self.get_observed['Request_Throughput'] is None:
                self.get_histogram_count = Parser.percentages_to_counts(
                    self.get_histogram_percentage, 0)
            else:
                self.get_histogram_count = Parser.percentages_to_counts(
                    self.get_histogram_percentage,
                    self.get_observed['Request_Throughput'] * self.seconds)

        else:
            # Memtier history should be used to cut off the first 10 and consume the next consecutive 60 seconds
            filename = base_path.joinpath(base_filename + '.stderr')
            memtier_history = []

            regex_history = r"\[RUN\s+#\d+\s+\d+%,\s+(\d+)\s+secs\]\s+\d+\s+threads:\s+\d+\s+ops,\s+(\d+)\s+\(avg:\s+\d+\)\s+ops/sec,\s+\d+\.\d+../sec\s+\(avg:\s+\d+\.\d+../sec\),\s+(\d+\.\d+)\s+\(avg:\s+\d+\.\d+\)\s+msec\s+latency"

            # Read in all lines
            with open(filename, "r") as file:
                for line in file:
                    result = re.findall(regex_history, line)
                    for second, ops, latency in result:
                        memtier_history.append(
                            (int(second), int(ops), float(latency)))

            # Extract the actual 60 second window
            high_performance_section = MemtierParser.extract_stable_window(
                memtier_history)
            seconds = len(high_performance_section)

            # Calculate the averages from extracted ops and latencies
            _, average_throughput, average_response_time = map(
                lambda x: x / seconds,
                [sum(x) for x in zip(*high_performance_section)])

            self.seconds = seconds
            # Store extracted values in the correct result type
            if path_interpretation['type'] == 'GET':
                self.get_observed = {
                    'Request_Throughput': average_throughput,
                    'Hits': None,
                    'Misses': None,
                    'Response_Time': average_response_time,
                    'Data_Throughput': None
                }
                t = MemtierParser.get_line_from_summary(
                    base_path, base_filename)
                self.get_observed['Hits'] = t['Hits']
                self.get_observed['Misses'] = t['Misses']
            elif path_interpretation['type'] == 'SET':
                self.set_observed = {
                    'Request_Throughput': average_throughput,
                    'Response_Time': average_response_time,
                    'Data_Throughput': None
                }
            else:
                # Assumption: Latencies stay constant between GET and SET requests. This is incorrect as such this
                #             should not be used for mixed-type approaches!
                self.get_observed = {
                    'Request_Throughput': average_throughput / 2,
                    'Hits': None,
                    'Misses': None,
                    'Response_Time': average_response_time,
                    'Data_Throughput': None
                }
                self.set_observed = {
                    'Request_Throughput': average_throughput / 2,
                    'Response_Time': average_response_time,
                    'Data_Throughput': None
                }
                t = MemtierParser.get_line_from_summary(
                    base_path, base_filename)
                self.get_observed['Hits'] = t['Hits']
                self.get_observed['Misses'] = t['Misses']

        self.set_interactive = Parser.interactive_law_check(
            self.set_observed, self.clients)
        self.get_interactive = Parser.interactive_law_check(
            self.get_observed, self.clients)