def generate_dataset_histogram(dataset, output_path):
    histogram = Counter(
        [d_i["data"]["segment_type"]["data"] for d_i in dataset.values()])
    xs = [1, 2, 3, 4, 5, 6, 7]
    ys = [c[1] / sum(histogram.values()) for c in sorted(histogram.items())]
    plt.xticks(xs, [1, 2, 3, 4, 5, 6, "Outlier"])
    helpers.ylabel(r"$\mathbb{P}\{x = \mathcal{X}\}$")
    helpers.xlabel("Class Label")

    helpers.plot_a_bar(xs, ys, idx=1)
    helpers.save_figure(output_path, no_legend=True)
Example #2
0
def generate_number_of_successful_requests_bar_plot(parameter_name, parameter_value, trials):
    """
    Generate a bar plot showing the number of requests that completed successfully in each of 
    the trials.
    """
    bar_width = 0.2
    bar_x_locations = np.arange(0.0, (len(trials)+1)*2*bar_width, 2*bar_width)
    bar_labels = [the_trial.name.replace("-approximate", "") for the_trial in trials]
    for idx, the_trial in enumerate(trials):
        print(f"generate_number_of_successful_requests_bar_plot: {idx}, {the_trial.name}")
        y_value = the_trial.get_parameter("number-of-successful-flows")
        helpers.plot_a_bar(bar_x_locations[idx], y_value, label=bar_labels[idx], bar_width=bar_width, idx=idx)
    plt.xticks(bar_x_locations, bar_labels)
    helpers.save_figure(f"successful-requests-bar-{parameter_name}-{parameter_value}.pdf", no_legend=True)
Example #3
0
def generate_number_of_time_periods_shares_were_active_pdf(set_of_traces_to_plot, trace_names):
    """
    Generate a plot of the probability density function of the number of \delta ms time
    periods that shares for a particular sequence number were present in the network. A single
    PDF is generated and plotted for each ofthe traces in <set_of_traces_to_plot>.
    """
    bar_width = 0.35
    possible_x_values = set()
    for bar_idx, (trace_name, packets) in enumerate(zip(trace_names, set_of_traces_to_plot)):
        packets = sorted(packets, key=lambda p_i: p_i.timestamp)
        delta = 100 * 10**3
        interval_start  = packets[0].timestamp
        current_time    = packets[0].timestamp
        seq_num_to_list_of_intervals = defaultdict(list)
        interval_index = 0
        for p_i in packets:
            current_time = p_i.timestamp
            if (current_time - interval_start) > delta:
                interval_index += 1
                interval_start = current_time
            seq_num_to_list_of_intervals[p_i.seq_num].append(interval_index)

        seq_num_to_interval_count = {}
        for seq_num, list_of_intervals in seq_num_to_list_of_intervals.items():
            seq_num_to_interval_count[seq_num] = (max(list_of_intervals) - \
                    min(list_of_intervals)) + 1

        counted_data = list(Counter(seq_num_to_interval_count.values()).items())
        hist_data_for_trace = sorted(counted_data,
                key=lambda kvp_i: kvp_i[0])
        possible_x_values = possible_x_values | set([t_i[0] for t_i in hist_data_for_trace])
        vector_sum = sum((t_i[1] for t_i in hist_data_for_trace))
        normed_hist_data_for_trace = [t_i[1] / vector_sum for t_i in hist_data_for_trace]

        bar_x_locations = [t_i[0] + (bar_width * bar_idx) for t_i in hist_data_for_trace]
        helpers.plot_a_bar(bar_x_locations, normed_hist_data_for_trace,
                idx=bar_idx, bar_width=bar_width, label=helpers.legend_font(trace_name))

    x_tick_labels = list(sorted(possible_x_values))
    x_tick_locations = [x_i + ((bar_width/2) * (len(set_of_traces_to_plot)-1))
            for x_i in x_tick_labels]
    plt.xticks(x_tick_locations, x_tick_labels)
    # plt.xlabel(r"Number of $\delta$ms intervals sequence shares were present in the network")
    plt.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x = \mathcal{X}\}$"))
    helpers.save_figure("share-presence-pdf.pdf", num_cols=len(set_of_traces_to_plot))
Example #4
0
def generate_active_paths_per_interval_plot(set_of_traces_to_plot, trace_names):
    """
    Generate a plot of the probability density function of the number of active paths
    per \delta ms interval. A path is defined as being active if it is carrying 
    shares for any sequence number.
    """
    bar_width = 0.4
    possible_x_values = set()
    for bar_idx, (trace_name, packets) in enumerate(zip(trace_names, set_of_traces_to_plot)):
        packets = sorted(packets, key=lambda p_i: p_i.timestamp)
        delta = 100 * 10**3 # microseconds
        interval_start  = packets[0].timestamp
        current_time    = packets[0].timestamp

        active_ports_in_interval = set()
        number_of_active_paths_per_interval = []
        for p_i in packets:
            current_time = p_i.timestamp
            if (current_time - interval_start) > delta:
                number_of_active_paths_per_interval.append(len(active_ports_in_interval))
                active_ports_in_interval = set()
                interval_start = current_time

            active_ports_in_interval.add((p_i.source_port, p_i.destination_port))
        

        counted_data = list(Counter(number_of_active_paths_per_interval).items())
        hist_data_for_trace = sorted(counted_data,
            key=lambda kvp_i: kvp_i[0])
        possible_x_values = possible_x_values | set([t_i[0] for t_i in hist_data_for_trace])
        vector_sum = sum([t_i[1] for t_i in hist_data_for_trace])
        normed_hist_data_for_trace = [t_i[1] / vector_sum for t_i in hist_data_for_trace]

        bar_x_locations = [t_i[0] + (bar_width * bar_idx) for t_i in hist_data_for_trace]
        helpers.plot_a_bar(bar_x_locations, normed_hist_data_for_trace, 
                idx=bar_idx, bar_width=bar_width, label=helpers.legend_font(trace_name))
        
    # x_tick_labels       = list(sorted(possible_x_values))
    x_tick_labels = np.arange(min(possible_x_values), max(possible_x_values) + 1)
    x_tick_locations = [x_i + ((bar_width/2) * (len(set_of_traces_to_plot)-1)) for x_i in 
            x_tick_labels]
    plt.xticks(x_tick_locations, x_tick_labels)
    plt.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x = \mathcal{X}\}$"))
    helpers.save_figure("active-paths-histogram.pdf", num_cols=len(set_of_traces_to_plot))
def generate_per_class_accuracy_bar_plot():
    conf_mat_1 = more_dropout
    conf_mat_2 = undersampled

    fig, (ax1, ax2) = plt.subplots(2)

    bar_width = 0.1
    bar_1_xs = np.arange(0.0, 0.3 * 6.5, 0.3)
    bar_2_xs = [x_i + bar_width for x_i in bar_1_xs]
    print(bar_1_xs, bar_2_xs)

    bar_1_ys = [
        100 *
        (conf_mat_1[idx][idx] / sum(get_samples_that_are(conf_mat_1, idx + 1)))
        for idx in range(7)
    ]
    bar_2_ys = [
        100 *
        (conf_mat_2[idx][idx] / sum(get_samples_that_are(conf_mat_2, idx + 1)))
        for idx in range(7)
    ]

    plt.xticks([x_i + 0.5 * bar_width for x_i in bar_1_xs],
               [1, 2, 3, 4, 5, 6, "Outlier"])
    helpers.plot_a_bar(bar_1_xs,
                       bar_1_ys,
                       idx=0,
                       label="Augmented Dataset",
                       bar_width=bar_width,
                       axis_to_plot_on=ax2)
    helpers.plot_a_bar(bar_2_xs,
                       bar_2_ys,
                       idx=1,
                       label="Undersampled Dataset",
                       bar_width=bar_width,
                       axis_to_plot_on=ax2)
    helpers.xlabel("Class label")
    helpers.ylabel(r"Validation Accuracy (\%)", formatter=lambda x: x, ax=ax2)

    undersampled_dataset = json.loads(
        path.Path("./segments-undersampled.json").read_text())
    histogram = Counter([
        d_i["data"]["segment_type"]["data"]
        for d_i in undersampled_dataset.values()
    ])
    xs = [x_i for x_i in range(1, 8)]
    total_samples = sum(histogram.values())
    ys = [c[1] / total_samples for c in sorted(histogram.items())]
    helpers.plot_a_bar(xs, ys, idx=1, axis_to_plot_on=ax1, label_data=False)
    helpers.ylabel(r"$\mathbb{P}\{x = \mathcal{X}\}$",
                   formatter=lambda x: x,
                   ax=ax1)
    ax1.set_yticks([0.1, 0.2, 0.3])
    ax1.grid(**cfg.GRID)
    ax1.xaxis.set_ticklabels([])
    legend_params = deepcopy(cfg.LEGEND)
    legend_params["bbox_to_anchor"] = (0.5, 0.975)
    fig.legend(**legend_params, ncol=2)
    helpers.save_figure(figure_output_dir / "per-class-error-bar-plot.pdf",
                        no_legend=True)