Example #1
0
    def test_deep_dict_iterate(self):
        """deep_dict.iterate()"""
        data = {
            "a": {
                "aa": {
                    "aaa": 1,
                    "aab": 2
                },
                "ab": {
                    "aba": 3,
                    "abb": 4
                }
            },
            "b": {
                "ba": {
                    "baa": 5,
                    "bab": 6
                },
                "bb": {
                    "bba": 7,
                    "bbb": 8
                }
            },
        }
        expected = [
            (["a", "aa", "aaa"], 1),
            (["a", "aa", "aab"], 2),
            (["a", "ab", "aba"], 3),
            (["a", "ab", "abb"], 4),
            (["b", "ba", "baa"], 5),
            (["b", "ba", "bab"], 6),
            (["b", "bb", "bba"], 7),
            (["b", "bb", "bbb"], 8),
        ]

        self.assertEqual(deep_dict.iterate(data), expected)
Example #2
0
    def compute_aggregates(self):
        """Compute aggregates (average, variance,...) of the values in self.agg_results"""
        # pylint: disable=too-many-statements
        for path, val in deep_dict.iterate(self.agg_results):
            if path[-1] == "ops_per_sec" and isinstance(val, list):
                # Compute aggregates for the ops_per_sec value over builds
                parent_obj = deep_dict.get_value(self.agg_results, path[0:-1])
                parent_obj["average"] = float(numpy.average(val))
                parent_obj["median"] = float(numpy.median(val))

                # Sanity check for zero results
                if parent_obj["average"] == 0 or parent_obj["median"] == 0:
                    print(
                        "WARNING: Zero average or median, deleting {}".format(
                            path))
                    deep_dict.del_value(self.agg_results, path[0:-1])
                    continue

                parent_obj["variance"] = float(numpy.var(val, ddof=1))
                parent_obj["variance_to_mean"] = float(
                    parent_obj["variance"]) / float(parent_obj["average"])
                parent_obj["min"] = min(val)
                parent_obj["max"] = max(val)
                parent_obj["range"] = parent_obj["max"] - parent_obj["min"]
                parent_obj["range_to_median"] = float(
                    parent_obj["range"]) / float(parent_obj["median"])
            elif path[-1] == "ops_per_sec_values" and isinstance(val, list):
                # Compute aggregates over the iterations inside each build, and pack result back
                # into an array that contains the result for each build
                try:
                    parent_obj = deep_dict.get_value(self.agg_results,
                                                     path[0:-1])
                except KeyError:
                    # parent_obj has been deleted above, just skip this
                    continue

                parent_obj["it_average"] = []  # Equal to ops_per_sec
                parent_obj["it_median"] = []
                parent_obj["it_variance"] = []
                parent_obj["it_variance_to_mean"] = []
                parent_obj["it_min"] = []
                parent_obj["it_max"] = []
                parent_obj["it_range"] = []
                parent_obj["it_range_to_median"] = []
                for per_build_iterations in val:
                    parent_obj["it_average"].append(
                        float(numpy.average(per_build_iterations)))
                    parent_obj["it_median"].append(
                        float(numpy.median(per_build_iterations)))
                    # TODO: May need to add sanity check here too to avoid divison with zero below.
                    # For now, never had data where that would actually happen.
                    parent_obj["it_variance"].append(
                        float(numpy.var(per_build_iterations, ddof=1)))
                    parent_obj["it_variance_to_mean"].append(
                        float(numpy.var(per_build_iterations, ddof=1)) /
                        float(numpy.average(per_build_iterations)))
                    parent_obj["it_min"].append(
                        float(min(per_build_iterations)))
                    parent_obj["it_max"].append(
                        float(max(per_build_iterations)))
                    parent_obj["it_range"].append(
                        float(max(per_build_iterations)) -
                        float(min(per_build_iterations)))
                    parent_obj["it_range_to_median"].append(
                        (float(max(per_build_iterations)) -
                         float(min(per_build_iterations))) /
                        float(numpy.median(per_build_iterations)))

                # Compute aggregate iteration stats
                parent_obj["it_range_to_median_avg"] = numpy.average(
                    parent_obj["it_range_to_median"])
                parent_obj["it_range_to_median_max"] = max(
                    parent_obj["it_range_to_median"])
                parent_obj["it_range_to_median_min"] = min(
                    parent_obj["it_range_to_median"])

                # Flatten the ops_per_sec_values array and compute aggregates over all values, that
                # is, over all iterations in all builds
                flat_array = []
                for arr in val:
                    for value in arr:
                        flat_array.append(value)
                parent_obj["all_average"] = float(numpy.average(flat_array))
                parent_obj["all_median"] = float(numpy.median(flat_array))
                parent_obj["all_variance"] = float(
                    numpy.var(flat_array, ddof=1))
                parent_obj["all_variance_to_mean"] = float(
                    numpy.var(flat_array, ddof=1)) / float(
                        numpy.average(flat_array))
                parent_obj["all_min"] = float(min(flat_array))
                parent_obj["all_max"] = float(max(flat_array))
                parent_obj["all_range"] = float(max(flat_array)) - float(
                    min(flat_array))
                parent_obj["all_range_to_median"] = (
                    float(max(flat_array)) - float(min(flat_array))) / float(
                        numpy.median(flat_array))
Example #3
0
    def scatter_graphs(self):
        """Write some pyplot graphs into sub-directory"""
        # pylint: disable=too-many-locals,too-many-nested-blocks,too-many-statements
        # pylint: disable=too-many-branches
        directory = os.path.expanduser(self.config["graph_dir"])
        if not os.path.isdir(directory):
            os.makedirs(directory)

        pyplot.style.use("ggplot")

        # Each variant is a separate graph
        # Second value is whether to use logarithmic y-axis
        metrics = [("ops_per_sec_values", False)]

        # Strings used in filenames for output files
        dataset_names = ["", "--mc", "--pri", "--canary"]

        for metric, log in metrics:
            dataset_index = -1
            for dataset in self.separate_fio_tests():
                dataset_index += 1
                # Separate set of graphs for each variant
                for variant_name, variant_obj in six.iteritems(dataset):
                    # yvalues[build_index][iteration_index][test_index] = 123.456
                    # In other words, the innermost array corresponds to test_names
                    yvalues = []
                    test_names = []
                    for path, ops_per_sec_values in deep_dict.iterate(variant_obj):
                        if path[-1] == metric:
                            test_names.append(
                                path[1] + "." + str(path[2])
                            )  # test_name.thread_level
                            for build_index, build_values in enumerate(ops_per_sec_values):
                                for iteration_index, iteration_values in enumerate(build_values):

                                    while len(yvalues) <= build_index:
                                        yvalues.append([])
                                    while len(yvalues[build_index]) <= iteration_index:
                                        yvalues[build_index].append([])

                                    # This is what we're really here for
                                    value = ops_per_sec_values[build_index][iteration_index]
                                    yvalues[build_index][iteration_index].append(value)

                    axis = pyplot.subplot(111)
                    pyplot.subplots_adjust(bottom=0.4)
                    xvalues = list(range(len(test_names)))
                    # Each build gets its shade of blue
                    colors = numpy.array(list(range(len(yvalues)))) / float(len(yvalues))
                    markers = [
                        "+",
                        "x",
                        "1",
                        "2",
                        "3",
                        "4",
                        "8",
                        "s",
                        "p",
                        "*",
                        "h",
                        "H",
                        "D",
                        "d",
                        "|",
                        "_",
                    ]

                    for build_index, build_values in enumerate(yvalues):
                        for iteration_index, iteration_values in enumerate(build_values):
                            axis.scatter(
                                xvalues,
                                iteration_values,
                                marker=markers[build_index],
                                alpha=0.5,
                                edgecolors="none",
                                c=[[1 - colors[build_index], 0, colors[build_index]]],
                            )

                    axis.set_xticks(numpy.arange(len(test_names)) + 0.5)
                    axis.set_xticklabels(test_names, rotation=90)
                    axis.tick_params(axis="both", which="major", labelsize=5)
                    axis.tick_params(axis="both", which="minor", labelsize=5)
                    pyplot.title(variant_name + " : " + metric)

                    # Save to file
                    postfix = ""
                    if log:
                        postfix += "--log"
                    postfix += dataset_names[dataset_index]
                    postfix += "--scatter"

                    file_name = variant_name + "--" + metric + postfix + ".png"
                    path = os.path.join(directory, file_name)
                    pyplot.savefig(path, dpi=500, format="png")

                    # Save another version of the same graph, zooming y-axis to 20k
                    axis.set_ylim([0.0, 20000])
                    file_name = variant_name + "--" + metric + postfix + "--medium.png"
                    path = os.path.join(directory, file_name)
                    pyplot.savefig(path, dpi=500, format="png")

                    # Save another version of the same graph, zooming y-axis to 10k
                    axis.set_ylim([0.0, 10000])
                    file_name = variant_name + "--" + metric + postfix + "--small.png"
                    path = os.path.join(directory, file_name)
                    pyplot.savefig(path, dpi=500, format="png")

                    pyplot.clf()  # Reset canvas between loops
        print("Wrote scatter graphs to {}{}.".format(directory, os.sep))
Example #4
0
    def line_graphs(self):
        """Write some pyplot graphs into sub-directory"""
        # pylint: disable=too-many-locals,too-many-nested-blocks,too-many-statements
        # pylint: disable=too-many-branches
        directory = os.path.expanduser(self.config["graph_dir"])
        if not os.path.isdir(directory):
            os.makedirs(directory)

        pyplot.style.use("ggplot")

        # Each variant is a separate graph
        # Second value is whether to use logarithmic y-axis
        metrics = [("ops_per_sec_values", False)]

        # Strings used in filenames for output files
        dataset_names = ["", "--mc", "--pri", "--canary"]

        for metric, log in metrics:
            dataset_index = -1
            for dataset in self.separate_fio_tests():
                dataset_index += 1
                # Separate set of graphs for each variant
                for variant_name, variant_obj in six.iteritems(dataset):
                    # test_results[test_index] = [123.456, 123.111, ...]
                    # Is a flat per-test array, containing all test_iterations over all builds
                    test_results = {}
                    test_names = []
                    for path, ops_per_sec_values in deep_dict.iterate(variant_obj):
                        if path[-1] == metric:
                            test_name = path[1] + "." + str(path[2])  # test_name.thread_level
                            test_names.append(test_name)
                            for build_values in ops_per_sec_values:
                                for iteration_values in build_values:
                                    if test_name not in test_results:
                                        test_results[test_name] = []
                                    # This is what we're really here for
                                    test_results[test_name].append(iteration_values)

                    markers = [
                        "+",
                        "x",
                        "1",
                        "2",
                        "3",
                        "4",
                        "8",
                        "s",
                        "p",
                        "*",
                        "h",
                        "H",
                        "D",
                        "d",
                        "|",
                        "_",
                    ]
                    marker_index = 0
                    axis = pyplot.subplot(111)
                    pyplot.subplots_adjust(bottom=0.0)
                    if log:
                        axis.set_yscale("log")
                    for test_name, test_result_array in six.iteritems(test_results):
                        axis.plot(
                            test_result_array,
                            label=test_name,
                            marker=markers[marker_index],
                            markersize=4,
                        )
                        marker_index += 1
                        marker_index = marker_index % len(markers)
                    # WARNING! The legend() function seems broken. The colors and markers
                    # in the legend don't actually map to the right test names.
                    # axis.legend(test_names, loc='upper left', bbox_to_anchor=(-0.15, -0.07),
                    # ncol=4, fontsize='xx-small')

                    pyplot.title(variant_name + " : " + metric)

                    # Save to file
                    postfix = ""
                    if log:
                        postfix += "--log"
                    postfix += dataset_names[dataset_index]
                    postfix += "--line"

                    file_name = variant_name + "--" + metric + postfix + ".png"
                    path = os.path.join(directory, file_name)
                    pyplot.savefig(path, dpi=500, format="png")

                    # Save another version of the same graph, zooming y-axis to 200k
                    axis.set_ylim([0.0, 200000])
                    file_name = variant_name + "--" + metric + postfix + "--medium.png"
                    path = os.path.join(directory, file_name)
                    pyplot.savefig(path, dpi=500, format="png")

                    # Save another version of the same graph, zooming y-axis to 50k
                    axis.set_ylim([0.0, 50000])
                    file_name = variant_name + "--" + metric + postfix + "--small.png"
                    path = os.path.join(directory, file_name)
                    pyplot.savefig(path, dpi=500, format="png")

                    # Save another version of the same graph, zooming y-axis to 10k
                    axis.set_ylim([0.0, 10000])
                    file_name = variant_name + "--" + metric + postfix + "--xsmall.png"
                    path = os.path.join(directory, file_name)
                    pyplot.savefig(path, dpi=500, format="png")

                    # Save another version of the same graph, zooming y-axis to 250
                    axis.set_ylim([0.0, 250])
                    file_name = variant_name + "--" + metric + postfix + "--xxsmall.png"
                    path = os.path.join(directory, file_name)
                    pyplot.savefig(path, dpi=500, format="png")

                    pyplot.clf()  # Reset canvas between loops
        print("Wrote scatter graphs to {}{}.".format(directory, os.sep))
Example #5
0
    def bar_graphs(self):
        """Write some pyplot graphs into sub-directory"""
        # pylint: disable=too-many-locals,too-many-nested-blocks,too-many-statements
        directory = os.path.expanduser(self.config["graph_dir"])
        if not os.path.isdir(directory):
            os.makedirs(directory)

        pyplot.style.use("ggplot")

        # Each variant is a separate graph
        # Second value is whether to use logarithmic y-axis
        metrics = [
            ("variance_to_mean", False),
            ("range_to_median", False),
            ("average", False),
            ("max", False),
            ("all_variance_to_mean", False),
            ("all_range_to_median", False),
            ("all_average", False),
            ("all_max", False),
        ]

        # Strings used in filenames for output files
        dataset_names = ["", "--mc", "--pri", "--canary"]

        for metric, log in metrics:
            dataset_index = -1
            for dataset in self.separate_fio_tests():
                dataset_index += 1
                # Separate set of graphs for each variant
                for variant_name, variant_obj in six.iteritems(dataset):
                    # Get variance for each test
                    yvalues = []
                    yvalues_median = []
                    yvalues_min = []
                    test_names = []
                    for path, val in deep_dict.iterate(variant_obj):
                        if path[-1] == metric:
                            yvalues.append(val)
                            test_names.append(
                                path[1] + "." + str(path[2])
                            )  # test_name.thread_level
                            if metric == "max":
                                # For the 'max' graph we actually print a stacked bar chart with
                                # min-median-max
                                median_key = [path[0], path[1], path[2], "median"]
                                median_val = deep_dict.get_value(variant_obj, median_key)
                                yvalues_median.append(median_val)
                                min_key = [path[0], path[1], path[2], "min"]
                                min_val = deep_dict.get_value(variant_obj, min_key)
                                yvalues_min.append(min_val)
                            if metric == "all_max":
                                # For the 'max' graph we actually print a stacked bar chart with
                                # min-median-max
                                median_key = [path[0], path[1], path[2], "all_median"]
                                median_val = deep_dict.get_value(variant_obj, median_key)
                                yvalues_median.append(median_val)
                                min_key = [path[0], path[1], path[2], "all_min"]
                                min_val = deep_dict.get_value(variant_obj, min_key)
                                yvalues_min.append(min_val)

                    axis = pyplot.subplot(111)
                    pyplot.subplots_adjust(bottom=0.4)
                    width = 0.8
                    xvalues = list(range(len(test_names)))

                    axis.bar(xvalues, yvalues, width=width, log=log)
                    if metric in ["max", "all_max"]:
                        # pyplot is stupid and just draws these on top of each other.
                        # So one must start with the max value and go downward from there.
                        axis.bar(xvalues, yvalues_median, width=width, color="#0055ff", log=log)
                        axis.bar(xvalues, yvalues_min, width=width, color="#0000ff", log=log)

                    axis.set_xticks(numpy.arange(len(test_names)) + width / 2)
                    axis.set_xticklabels(test_names, rotation=90)
                    axis.tick_params(axis="both", which="major", labelsize=4)
                    axis.tick_params(axis="both", which="minor", labelsize=4)
                    pyplot.title(variant_name + " : " + metric)

                    # Save to file
                    postfix = ""
                    if log:
                        postfix += "--log"
                    postfix += dataset_names[dataset_index]

                    file_name = variant_name + "--" + metric + postfix + ".png"
                    path = os.path.join(directory, file_name)
                    pyplot.savefig(path, dpi=500, format="png")

                    if metric[-3:] == "max":
                        # Save another version of the same graph, zooming y-axis to 200k
                        axis.set_ylim([0.0, 200000])
                        file_name = variant_name + "--" + metric + postfix + "--medium.png"
                        path = os.path.join(directory, file_name)
                        pyplot.savefig(path, dpi=500, format="png")
                    if metric[-15:] == "range_to_median":
                        # Save another version of the same graph, zooming y-axis to 200k
                        axis.set_ylim([0.0, 0.3])
                        file_name = variant_name + "--" + metric + postfix + "--medium.png"
                        path = os.path.join(directory, file_name)
                        pyplot.savefig(path, dpi=500, format="png")

                    pyplot.clf()  # Reset canvas between loops
        print("Wrote bar graphs to {}{}.".format(directory, os.sep))