def generate_dataset_histogram(dataset, output_path): histogram = Counter( [d_i["data"]["segment_type"]["data"] for d_i in dataset.values()]) xs = [1, 2, 3, 4, 5, 6, 7] ys = [c[1] / sum(histogram.values()) for c in sorted(histogram.items())] plt.xticks(xs, [1, 2, 3, 4, 5, 6, "Outlier"]) helpers.ylabel(r"$\mathbb{P}\{x = \mathcal{X}\}$") helpers.xlabel("Class Label") helpers.plot_a_bar(xs, ys, idx=1) helpers.save_figure(output_path, no_legend=True)
def generate_number_of_successful_requests_bar_plot(parameter_name, parameter_value, trials): """ Generate a bar plot showing the number of requests that completed successfully in each of the trials. """ bar_width = 0.2 bar_x_locations = np.arange(0.0, (len(trials)+1)*2*bar_width, 2*bar_width) bar_labels = [the_trial.name.replace("-approximate", "") for the_trial in trials] for idx, the_trial in enumerate(trials): print(f"generate_number_of_successful_requests_bar_plot: {idx}, {the_trial.name}") y_value = the_trial.get_parameter("number-of-successful-flows") helpers.plot_a_bar(bar_x_locations[idx], y_value, label=bar_labels[idx], bar_width=bar_width, idx=idx) plt.xticks(bar_x_locations, bar_labels) helpers.save_figure(f"successful-requests-bar-{parameter_name}-{parameter_value}.pdf", no_legend=True)
def generate_number_of_time_periods_shares_were_active_pdf(set_of_traces_to_plot, trace_names): """ Generate a plot of the probability density function of the number of \delta ms time periods that shares for a particular sequence number were present in the network. A single PDF is generated and plotted for each ofthe traces in <set_of_traces_to_plot>. """ bar_width = 0.35 possible_x_values = set() for bar_idx, (trace_name, packets) in enumerate(zip(trace_names, set_of_traces_to_plot)): packets = sorted(packets, key=lambda p_i: p_i.timestamp) delta = 100 * 10**3 interval_start = packets[0].timestamp current_time = packets[0].timestamp seq_num_to_list_of_intervals = defaultdict(list) interval_index = 0 for p_i in packets: current_time = p_i.timestamp if (current_time - interval_start) > delta: interval_index += 1 interval_start = current_time seq_num_to_list_of_intervals[p_i.seq_num].append(interval_index) seq_num_to_interval_count = {} for seq_num, list_of_intervals in seq_num_to_list_of_intervals.items(): seq_num_to_interval_count[seq_num] = (max(list_of_intervals) - \ min(list_of_intervals)) + 1 counted_data = list(Counter(seq_num_to_interval_count.values()).items()) hist_data_for_trace = sorted(counted_data, key=lambda kvp_i: kvp_i[0]) possible_x_values = possible_x_values | set([t_i[0] for t_i in hist_data_for_trace]) vector_sum = sum((t_i[1] for t_i in hist_data_for_trace)) normed_hist_data_for_trace = [t_i[1] / vector_sum for t_i in hist_data_for_trace] bar_x_locations = [t_i[0] + (bar_width * bar_idx) for t_i in hist_data_for_trace] helpers.plot_a_bar(bar_x_locations, normed_hist_data_for_trace, idx=bar_idx, bar_width=bar_width, label=helpers.legend_font(trace_name)) x_tick_labels = list(sorted(possible_x_values)) x_tick_locations = [x_i + ((bar_width/2) * (len(set_of_traces_to_plot)-1)) for x_i in x_tick_labels] plt.xticks(x_tick_locations, x_tick_labels) # plt.xlabel(r"Number of $\delta$ms intervals sequence shares were present in the network") plt.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x = \mathcal{X}\}$")) helpers.save_figure("share-presence-pdf.pdf", num_cols=len(set_of_traces_to_plot))
def generate_active_paths_per_interval_plot(set_of_traces_to_plot, trace_names): """ Generate a plot of the probability density function of the number of active paths per \delta ms interval. A path is defined as being active if it is carrying shares for any sequence number. """ bar_width = 0.4 possible_x_values = set() for bar_idx, (trace_name, packets) in enumerate(zip(trace_names, set_of_traces_to_plot)): packets = sorted(packets, key=lambda p_i: p_i.timestamp) delta = 100 * 10**3 # microseconds interval_start = packets[0].timestamp current_time = packets[0].timestamp active_ports_in_interval = set() number_of_active_paths_per_interval = [] for p_i in packets: current_time = p_i.timestamp if (current_time - interval_start) > delta: number_of_active_paths_per_interval.append(len(active_ports_in_interval)) active_ports_in_interval = set() interval_start = current_time active_ports_in_interval.add((p_i.source_port, p_i.destination_port)) counted_data = list(Counter(number_of_active_paths_per_interval).items()) hist_data_for_trace = sorted(counted_data, key=lambda kvp_i: kvp_i[0]) possible_x_values = possible_x_values | set([t_i[0] for t_i in hist_data_for_trace]) vector_sum = sum([t_i[1] for t_i in hist_data_for_trace]) normed_hist_data_for_trace = [t_i[1] / vector_sum for t_i in hist_data_for_trace] bar_x_locations = [t_i[0] + (bar_width * bar_idx) for t_i in hist_data_for_trace] helpers.plot_a_bar(bar_x_locations, normed_hist_data_for_trace, idx=bar_idx, bar_width=bar_width, label=helpers.legend_font(trace_name)) # x_tick_labels = list(sorted(possible_x_values)) x_tick_labels = np.arange(min(possible_x_values), max(possible_x_values) + 1) x_tick_locations = [x_i + ((bar_width/2) * (len(set_of_traces_to_plot)-1)) for x_i in x_tick_labels] plt.xticks(x_tick_locations, x_tick_labels) plt.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x = \mathcal{X}\}$")) helpers.save_figure("active-paths-histogram.pdf", num_cols=len(set_of_traces_to_plot))
def generate_per_class_accuracy_bar_plot(): conf_mat_1 = more_dropout conf_mat_2 = undersampled fig, (ax1, ax2) = plt.subplots(2) bar_width = 0.1 bar_1_xs = np.arange(0.0, 0.3 * 6.5, 0.3) bar_2_xs = [x_i + bar_width for x_i in bar_1_xs] print(bar_1_xs, bar_2_xs) bar_1_ys = [ 100 * (conf_mat_1[idx][idx] / sum(get_samples_that_are(conf_mat_1, idx + 1))) for idx in range(7) ] bar_2_ys = [ 100 * (conf_mat_2[idx][idx] / sum(get_samples_that_are(conf_mat_2, idx + 1))) for idx in range(7) ] plt.xticks([x_i + 0.5 * bar_width for x_i in bar_1_xs], [1, 2, 3, 4, 5, 6, "Outlier"]) helpers.plot_a_bar(bar_1_xs, bar_1_ys, idx=0, label="Augmented Dataset", bar_width=bar_width, axis_to_plot_on=ax2) helpers.plot_a_bar(bar_2_xs, bar_2_ys, idx=1, label="Undersampled Dataset", bar_width=bar_width, axis_to_plot_on=ax2) helpers.xlabel("Class label") helpers.ylabel(r"Validation Accuracy (\%)", formatter=lambda x: x, ax=ax2) undersampled_dataset = json.loads( path.Path("./segments-undersampled.json").read_text()) histogram = Counter([ d_i["data"]["segment_type"]["data"] for d_i in undersampled_dataset.values() ]) xs = [x_i for x_i in range(1, 8)] total_samples = sum(histogram.values()) ys = [c[1] / total_samples for c in sorted(histogram.items())] helpers.plot_a_bar(xs, ys, idx=1, axis_to_plot_on=ax1, label_data=False) helpers.ylabel(r"$\mathbb{P}\{x = \mathcal{X}\}$", formatter=lambda x: x, ax=ax1) ax1.set_yticks([0.1, 0.2, 0.3]) ax1.grid(**cfg.GRID) ax1.xaxis.set_ticklabels([]) legend_params = deepcopy(cfg.LEGEND) legend_params["bbox_to_anchor"] = (0.5, 0.975) fig.legend(**legend_params, ncol=2) helpers.save_figure(figure_output_dir / "per-class-error-bar-plot.pdf", no_legend=True)