def test_deep_dict_iterate(self): """deep_dict.iterate()""" data = { "a": { "aa": { "aaa": 1, "aab": 2 }, "ab": { "aba": 3, "abb": 4 } }, "b": { "ba": { "baa": 5, "bab": 6 }, "bb": { "bba": 7, "bbb": 8 } }, } expected = [ (["a", "aa", "aaa"], 1), (["a", "aa", "aab"], 2), (["a", "ab", "aba"], 3), (["a", "ab", "abb"], 4), (["b", "ba", "baa"], 5), (["b", "ba", "bab"], 6), (["b", "bb", "bba"], 7), (["b", "bb", "bbb"], 8), ] self.assertEqual(deep_dict.iterate(data), expected)
def compute_aggregates(self): """Compute aggregates (average, variance,...) of the values in self.agg_results""" # pylint: disable=too-many-statements for path, val in deep_dict.iterate(self.agg_results): if path[-1] == "ops_per_sec" and isinstance(val, list): # Compute aggregates for the ops_per_sec value over builds parent_obj = deep_dict.get_value(self.agg_results, path[0:-1]) parent_obj["average"] = float(numpy.average(val)) parent_obj["median"] = float(numpy.median(val)) # Sanity check for zero results if parent_obj["average"] == 0 or parent_obj["median"] == 0: print( "WARNING: Zero average or median, deleting {}".format( path)) deep_dict.del_value(self.agg_results, path[0:-1]) continue parent_obj["variance"] = float(numpy.var(val, ddof=1)) parent_obj["variance_to_mean"] = float( parent_obj["variance"]) / float(parent_obj["average"]) parent_obj["min"] = min(val) parent_obj["max"] = max(val) parent_obj["range"] = parent_obj["max"] - parent_obj["min"] parent_obj["range_to_median"] = float( parent_obj["range"]) / float(parent_obj["median"]) elif path[-1] == "ops_per_sec_values" and isinstance(val, list): # Compute aggregates over the iterations inside each build, and pack result back # into an array that contains the result for each build try: parent_obj = deep_dict.get_value(self.agg_results, path[0:-1]) except KeyError: # parent_obj has been deleted above, just skip this continue parent_obj["it_average"] = [] # Equal to ops_per_sec parent_obj["it_median"] = [] parent_obj["it_variance"] = [] parent_obj["it_variance_to_mean"] = [] parent_obj["it_min"] = [] parent_obj["it_max"] = [] parent_obj["it_range"] = [] parent_obj["it_range_to_median"] = [] for per_build_iterations in val: parent_obj["it_average"].append( float(numpy.average(per_build_iterations))) parent_obj["it_median"].append( float(numpy.median(per_build_iterations))) # TODO: May need to add sanity check here too to avoid divison with zero below. # For now, never had data where that would actually happen. parent_obj["it_variance"].append( float(numpy.var(per_build_iterations, ddof=1))) parent_obj["it_variance_to_mean"].append( float(numpy.var(per_build_iterations, ddof=1)) / float(numpy.average(per_build_iterations))) parent_obj["it_min"].append( float(min(per_build_iterations))) parent_obj["it_max"].append( float(max(per_build_iterations))) parent_obj["it_range"].append( float(max(per_build_iterations)) - float(min(per_build_iterations))) parent_obj["it_range_to_median"].append( (float(max(per_build_iterations)) - float(min(per_build_iterations))) / float(numpy.median(per_build_iterations))) # Compute aggregate iteration stats parent_obj["it_range_to_median_avg"] = numpy.average( parent_obj["it_range_to_median"]) parent_obj["it_range_to_median_max"] = max( parent_obj["it_range_to_median"]) parent_obj["it_range_to_median_min"] = min( parent_obj["it_range_to_median"]) # Flatten the ops_per_sec_values array and compute aggregates over all values, that # is, over all iterations in all builds flat_array = [] for arr in val: for value in arr: flat_array.append(value) parent_obj["all_average"] = float(numpy.average(flat_array)) parent_obj["all_median"] = float(numpy.median(flat_array)) parent_obj["all_variance"] = float( numpy.var(flat_array, ddof=1)) parent_obj["all_variance_to_mean"] = float( numpy.var(flat_array, ddof=1)) / float( numpy.average(flat_array)) parent_obj["all_min"] = float(min(flat_array)) parent_obj["all_max"] = float(max(flat_array)) parent_obj["all_range"] = float(max(flat_array)) - float( min(flat_array)) parent_obj["all_range_to_median"] = ( float(max(flat_array)) - float(min(flat_array))) / float( numpy.median(flat_array))
def scatter_graphs(self): """Write some pyplot graphs into sub-directory""" # pylint: disable=too-many-locals,too-many-nested-blocks,too-many-statements # pylint: disable=too-many-branches directory = os.path.expanduser(self.config["graph_dir"]) if not os.path.isdir(directory): os.makedirs(directory) pyplot.style.use("ggplot") # Each variant is a separate graph # Second value is whether to use logarithmic y-axis metrics = [("ops_per_sec_values", False)] # Strings used in filenames for output files dataset_names = ["", "--mc", "--pri", "--canary"] for metric, log in metrics: dataset_index = -1 for dataset in self.separate_fio_tests(): dataset_index += 1 # Separate set of graphs for each variant for variant_name, variant_obj in six.iteritems(dataset): # yvalues[build_index][iteration_index][test_index] = 123.456 # In other words, the innermost array corresponds to test_names yvalues = [] test_names = [] for path, ops_per_sec_values in deep_dict.iterate(variant_obj): if path[-1] == metric: test_names.append( path[1] + "." + str(path[2]) ) # test_name.thread_level for build_index, build_values in enumerate(ops_per_sec_values): for iteration_index, iteration_values in enumerate(build_values): while len(yvalues) <= build_index: yvalues.append([]) while len(yvalues[build_index]) <= iteration_index: yvalues[build_index].append([]) # This is what we're really here for value = ops_per_sec_values[build_index][iteration_index] yvalues[build_index][iteration_index].append(value) axis = pyplot.subplot(111) pyplot.subplots_adjust(bottom=0.4) xvalues = list(range(len(test_names))) # Each build gets its shade of blue colors = numpy.array(list(range(len(yvalues)))) / float(len(yvalues)) markers = [ "+", "x", "1", "2", "3", "4", "8", "s", "p", "*", "h", "H", "D", "d", "|", "_", ] for build_index, build_values in enumerate(yvalues): for iteration_index, iteration_values in enumerate(build_values): axis.scatter( xvalues, iteration_values, marker=markers[build_index], alpha=0.5, edgecolors="none", c=[[1 - colors[build_index], 0, colors[build_index]]], ) axis.set_xticks(numpy.arange(len(test_names)) + 0.5) axis.set_xticklabels(test_names, rotation=90) axis.tick_params(axis="both", which="major", labelsize=5) axis.tick_params(axis="both", which="minor", labelsize=5) pyplot.title(variant_name + " : " + metric) # Save to file postfix = "" if log: postfix += "--log" postfix += dataset_names[dataset_index] postfix += "--scatter" file_name = variant_name + "--" + metric + postfix + ".png" path = os.path.join(directory, file_name) pyplot.savefig(path, dpi=500, format="png") # Save another version of the same graph, zooming y-axis to 20k axis.set_ylim([0.0, 20000]) file_name = variant_name + "--" + metric + postfix + "--medium.png" path = os.path.join(directory, file_name) pyplot.savefig(path, dpi=500, format="png") # Save another version of the same graph, zooming y-axis to 10k axis.set_ylim([0.0, 10000]) file_name = variant_name + "--" + metric + postfix + "--small.png" path = os.path.join(directory, file_name) pyplot.savefig(path, dpi=500, format="png") pyplot.clf() # Reset canvas between loops print("Wrote scatter graphs to {}{}.".format(directory, os.sep))
def line_graphs(self): """Write some pyplot graphs into sub-directory""" # pylint: disable=too-many-locals,too-many-nested-blocks,too-many-statements # pylint: disable=too-many-branches directory = os.path.expanduser(self.config["graph_dir"]) if not os.path.isdir(directory): os.makedirs(directory) pyplot.style.use("ggplot") # Each variant is a separate graph # Second value is whether to use logarithmic y-axis metrics = [("ops_per_sec_values", False)] # Strings used in filenames for output files dataset_names = ["", "--mc", "--pri", "--canary"] for metric, log in metrics: dataset_index = -1 for dataset in self.separate_fio_tests(): dataset_index += 1 # Separate set of graphs for each variant for variant_name, variant_obj in six.iteritems(dataset): # test_results[test_index] = [123.456, 123.111, ...] # Is a flat per-test array, containing all test_iterations over all builds test_results = {} test_names = [] for path, ops_per_sec_values in deep_dict.iterate(variant_obj): if path[-1] == metric: test_name = path[1] + "." + str(path[2]) # test_name.thread_level test_names.append(test_name) for build_values in ops_per_sec_values: for iteration_values in build_values: if test_name not in test_results: test_results[test_name] = [] # This is what we're really here for test_results[test_name].append(iteration_values) markers = [ "+", "x", "1", "2", "3", "4", "8", "s", "p", "*", "h", "H", "D", "d", "|", "_", ] marker_index = 0 axis = pyplot.subplot(111) pyplot.subplots_adjust(bottom=0.0) if log: axis.set_yscale("log") for test_name, test_result_array in six.iteritems(test_results): axis.plot( test_result_array, label=test_name, marker=markers[marker_index], markersize=4, ) marker_index += 1 marker_index = marker_index % len(markers) # WARNING! The legend() function seems broken. The colors and markers # in the legend don't actually map to the right test names. # axis.legend(test_names, loc='upper left', bbox_to_anchor=(-0.15, -0.07), # ncol=4, fontsize='xx-small') pyplot.title(variant_name + " : " + metric) # Save to file postfix = "" if log: postfix += "--log" postfix += dataset_names[dataset_index] postfix += "--line" file_name = variant_name + "--" + metric + postfix + ".png" path = os.path.join(directory, file_name) pyplot.savefig(path, dpi=500, format="png") # Save another version of the same graph, zooming y-axis to 200k axis.set_ylim([0.0, 200000]) file_name = variant_name + "--" + metric + postfix + "--medium.png" path = os.path.join(directory, file_name) pyplot.savefig(path, dpi=500, format="png") # Save another version of the same graph, zooming y-axis to 50k axis.set_ylim([0.0, 50000]) file_name = variant_name + "--" + metric + postfix + "--small.png" path = os.path.join(directory, file_name) pyplot.savefig(path, dpi=500, format="png") # Save another version of the same graph, zooming y-axis to 10k axis.set_ylim([0.0, 10000]) file_name = variant_name + "--" + metric + postfix + "--xsmall.png" path = os.path.join(directory, file_name) pyplot.savefig(path, dpi=500, format="png") # Save another version of the same graph, zooming y-axis to 250 axis.set_ylim([0.0, 250]) file_name = variant_name + "--" + metric + postfix + "--xxsmall.png" path = os.path.join(directory, file_name) pyplot.savefig(path, dpi=500, format="png") pyplot.clf() # Reset canvas between loops print("Wrote scatter graphs to {}{}.".format(directory, os.sep))
def bar_graphs(self): """Write some pyplot graphs into sub-directory""" # pylint: disable=too-many-locals,too-many-nested-blocks,too-many-statements directory = os.path.expanduser(self.config["graph_dir"]) if not os.path.isdir(directory): os.makedirs(directory) pyplot.style.use("ggplot") # Each variant is a separate graph # Second value is whether to use logarithmic y-axis metrics = [ ("variance_to_mean", False), ("range_to_median", False), ("average", False), ("max", False), ("all_variance_to_mean", False), ("all_range_to_median", False), ("all_average", False), ("all_max", False), ] # Strings used in filenames for output files dataset_names = ["", "--mc", "--pri", "--canary"] for metric, log in metrics: dataset_index = -1 for dataset in self.separate_fio_tests(): dataset_index += 1 # Separate set of graphs for each variant for variant_name, variant_obj in six.iteritems(dataset): # Get variance for each test yvalues = [] yvalues_median = [] yvalues_min = [] test_names = [] for path, val in deep_dict.iterate(variant_obj): if path[-1] == metric: yvalues.append(val) test_names.append( path[1] + "." + str(path[2]) ) # test_name.thread_level if metric == "max": # For the 'max' graph we actually print a stacked bar chart with # min-median-max median_key = [path[0], path[1], path[2], "median"] median_val = deep_dict.get_value(variant_obj, median_key) yvalues_median.append(median_val) min_key = [path[0], path[1], path[2], "min"] min_val = deep_dict.get_value(variant_obj, min_key) yvalues_min.append(min_val) if metric == "all_max": # For the 'max' graph we actually print a stacked bar chart with # min-median-max median_key = [path[0], path[1], path[2], "all_median"] median_val = deep_dict.get_value(variant_obj, median_key) yvalues_median.append(median_val) min_key = [path[0], path[1], path[2], "all_min"] min_val = deep_dict.get_value(variant_obj, min_key) yvalues_min.append(min_val) axis = pyplot.subplot(111) pyplot.subplots_adjust(bottom=0.4) width = 0.8 xvalues = list(range(len(test_names))) axis.bar(xvalues, yvalues, width=width, log=log) if metric in ["max", "all_max"]: # pyplot is stupid and just draws these on top of each other. # So one must start with the max value and go downward from there. axis.bar(xvalues, yvalues_median, width=width, color="#0055ff", log=log) axis.bar(xvalues, yvalues_min, width=width, color="#0000ff", log=log) axis.set_xticks(numpy.arange(len(test_names)) + width / 2) axis.set_xticklabels(test_names, rotation=90) axis.tick_params(axis="both", which="major", labelsize=4) axis.tick_params(axis="both", which="minor", labelsize=4) pyplot.title(variant_name + " : " + metric) # Save to file postfix = "" if log: postfix += "--log" postfix += dataset_names[dataset_index] file_name = variant_name + "--" + metric + postfix + ".png" path = os.path.join(directory, file_name) pyplot.savefig(path, dpi=500, format="png") if metric[-3:] == "max": # Save another version of the same graph, zooming y-axis to 200k axis.set_ylim([0.0, 200000]) file_name = variant_name + "--" + metric + postfix + "--medium.png" path = os.path.join(directory, file_name) pyplot.savefig(path, dpi=500, format="png") if metric[-15:] == "range_to_median": # Save another version of the same graph, zooming y-axis to 200k axis.set_ylim([0.0, 0.3]) file_name = variant_name + "--" + metric + postfix + "--medium.png" path = os.path.join(directory, file_name) pyplot.savefig(path, dpi=500, format="png") pyplot.clf() # Reset canvas between loops print("Wrote bar graphs to {}{}.".format(directory, os.sep))