Exemple #1
0
def write_client_data(output_dir, all_clients, all_services, client_data):
    fig, ax = map_utils.subplots()
    ax.set_title('Client request duration over time per service and client')
    ax.set_ylabel('duration (s)')
    ax.set_xlabel('time (minutes)')

    with open(output_dir / 'client_request_duration.csv', 'w') as f:
        f.write('client,service,requests,avg_duration\n')
        successful_requests = dict()
        for node_dir in all_clients:
            for service in all_services:
                plot_data = client_data.loc[ (client_data['client'] == node_dir) & (client_data['service'] == service) ]

                label = "{0}-{1}".format(node_dir, service)
                ax.scatter(plot_data['time_minutes'], plot_data['latency_seconds'], label=label, s=1)

                request_count = client_data.loc[(client_data['client'] == node_dir) & (client_data['service'] == service)].count()['timestamp']
                get_logger().info("Number of requests executed by %s for %s = %d", node_dir, service, request_count)
                successful_requests[service] = successful_requests.get(service, 0) + request_count

                avg_duration = plot_data['latency_seconds'].mean()
                count = plot_data['latency_seconds'].size
                f.write('{0},{1},{2},{3}\n'.format(node_dir, service, count, avg_duration))

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

    output_name = output_dir / 'client_request_duration.png'
    fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close(fig)

    get_logger().info("Total client requests by service %s", successful_requests)

    # get latency per service across all clients
    fig, ax = map_utils.subplots()
    ax.set_title('Client request duration over time per service')
    ax.set_ylabel('duration (s)')
    ax.set_xlabel('time (minutes)')

    for service in all_services:
        plot_data = client_data.loc[ (client_data['service'] == service) ]

        label = "{0}".format(service)
        ax.scatter(plot_data['time_minutes'], plot_data['latency_seconds'], label=label, s=1)

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

    output_name = output_dir / 'client_request_duration_per_service.png'
    fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')

    plt.close(fig)

    with open(output_dir / 'client-service-counts.csv', 'w') as f:
        f.write('service,requests\n')
        for (service, count) in successful_requests.items():
            f.write('{0},{1}\n'.format(service, count))
Exemple #2
0
def plot_log_line_occurrence(output_file, title, occurrence_times):
    """
    Outputs a plot of the occurrences of the log messages in time
    
    Arguments:
        output_file(str): file path to write the plot to
        title(str): title of the plot
        occurrence_times(list(float)): list of log line occurrence times in minutes
    """

    fig, ax = map_utils.subplots()
    ax.set_title(f'{title}')
    ax.set_ylabel('')
    ax.set_xlabel('time (minutes)')

    ax.errorbar(occurrence_times, [0] * len(occurrence_times),
                yerr=1,
                fmt='o',
                elinewidth=1,
                label="{0}".format(""))
    ax.set_xlim(left=0, right=max(occurrence_times))

    fig.savefig(Path(output_file).as_posix(),
                format='png',
                bbox_extra_artists=(),
                bbox_inches='tight')
Exemple #3
0
def output_graph(output, data, min_time):
    fig, ax = map_utils.subplots()
    ax.set_title("Resource report generation lag")
    ax.set_xlabel("Time (minutes)")
    ax.set_ylabel("Difference (seconds)")

    max_minutes = 0
    for node_name, plot_data in data.items():
        (xs, ys) = plot_data
        minutes = [map_utils.timestamp_to_minutes(x - min_time) for x in xs]
        max_minutes = max(max_minutes, max(minutes))
        ax.scatter(minutes, ys, label=node_name, s=1)

    ax.set_xlim(left=0, right=max_minutes)
    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles,
                    labels,
                    bbox_to_anchor=(1.04, 1),
                    loc="upper left")

    output_name = output / 'resource-report-time-lag.png'
    plt.savefig(output_name.as_posix(),
                format='png',
                bbox_extra_artists=(lgd, ),
                bbox_inches='tight')
    plt.close(fig)
Exemple #4
0
def write_memory_plot(node_name, file_memory, output, first_timestamp_sec):
    df = pd.read_csv(file_memory)
    df['time_minutes'] = (df['timestamp'] - first_timestamp_sec) / 60
    max_minutes = df['time_minutes'].max()

    fig, ax = map_utils.subplots()
    ax.set_title(f"{node_name} Process memory load")
    ax.set_ylabel('% memory')
    ax.set_xlabel('time (minutes)')
    ax.set_xlim(left=0, right=max_minutes)
    
    ydata = list()
    labels = list()
    for process in sorted(df.columns, key=process_key):
        if process != 'timestamp' and process != 'time_minutes':
            ydata.append(df[process])
            labels.append(process)

    ax.stackplot(df['time_minutes'], ydata, labels=labels)
            
    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

    output_name = output / f"{file_memory.stem}.png"
    fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close(fig)
Exemple #5
0
def output_node_graph(output, node_name, data, label):
    """
    Arguments:
        output (Path): base directory to write to
        node_name (str): node name, None to state this is all data across nodes
        data (dict): service -> relative minutes -> ServiceCounts
        label (str): Client or Server
    """

    fig, ax = map_utils.subplots()
    if node_name is not None:
        ax.set_title(f"{label} Request Status for {node_name}")
    else:
        ax.set_title(f"{label} Request Status")
    ax.set_xlabel("Time (minutes)")
    ax.set_ylabel("Cumulative Count")

    for service, service_data in sorted(data.items()):
        pairs = sorted(service_data.items())
        times, values = zip(*pairs)
        success_counts = [c.success_count for c in values]
        failure_counts = [c.failure_count for c in values]

        ax.plot(times, success_counts, label=f"{service} success")
        ax.plot(times, failure_counts, label=f"{service} failure")

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

    if node_name is not None:
        output_name = output / f"{label.lower()}_request_status-{node_name}.png"
    else:
        output_name = output / f"{label.lower()}_request_status.png"
    fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close(fig)
Exemple #6
0
def write_combined_per_service(output_dir, all_services, server_data, client_data):

    for service in all_services:
        fig, ax = map_utils.subplots()
        ax.set_title('Client request and server processing duration over time for service {}'.format(service))
        ax.set_ylabel('duration (s)')
        ax.set_xlabel('time (minutes)')

        server_plot_data = server_data.loc[(server_data['service'] == service)]
        server_plot_data = server_plot_data.sort_values('time_minutes')

        server_label = "server processing {0}".format(service)
        ax.fill_between(server_plot_data['time_minutes'], server_plot_data['latency_seconds'], label=server_label,
                        interpolate=True, alpha=0.33)

        client_plot_data = client_data.loc[(client_data['service'] == service)]

        client_label = "client request {0}".format(service)
        ax.scatter(client_plot_data['time_minutes'], client_plot_data['latency_seconds'], label=client_label, s=1)

        handles, labels = ax.get_legend_handles_labels()
        lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

        output_name = output_dir / 'client_duration_with_server_{}.png'.format(service)
        fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')

        plt.close(fig)
Exemple #7
0
def graph(output, ncp, service_or_total, minutes, planned, actual_allocation, actual_load, actual_demand, show_load_demand_attr):
    if len(minutes) < 1:
        get_logger().debug("Nothing to output for %s %s", ncp, service_or_total)
        return

    max_minutes = max(minutes)

    fig, ax = map_utils.subplots()
    ax.set_title(f"RLG region plan for {ncp} {service_or_total}")
    ax.set_xlabel("Time (minutes)")
    ax.set_ylabel("Containers")
    ax.set_xlim(left=0, right=max_minutes)

    ax.plot(minutes, planned, label="Planned Allocation")
    ax.plot(minutes, actual_allocation, label="Actual Allocation")

    if show_load_demand_attr:
        ax.plot(minutes, actual_load, label="RLG Load")
        ax.plot(minutes, actual_demand, label="RLG Demand")

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

    output_name = output / f"rlg-plan_{ncp}_{service_or_total}.png"
    fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close(fig)
def graph_request_result(output, all_statuses, service_data, client_region, service):
    """
    Arguments:
        output(Path): directory for graphs
        all_statuses(list): all status messages seen in the dataset
        client_region(str): the client region to generate the graph for
        service(str): the service to generate the graph for
        df(DataFrame): All of the request data
    """
    results = service_data.groupby(['bin', 'message'])['relative minutes'].agg('count').reset_index()

    # status -> values to plot
    series = dict()
    bin_labels = list()
    bin_positions = list()
    for bin in sorted(results['bin'].unique()):
        bin_labels.append(f'{bin.left} - {bin.right})')
        bin_positions.append(bin.left)

        bin_results = results[results['bin'] == bin]
        
        # compute total value
        total = bin_results['relative minutes'].sum()

        get_logger().debug("Client region: %s service: %s bin: %s Total: %s", client_region, service, bin, total)

        # compute percentages
        for status in sorted(all_statuses):
            status_results = bin_results[bin_results['message'] == status]['relative minutes']
            if len(status_results) > 0:
                value = float(status_results.iloc[0]) / total * 100
            else:
                value = 0
            l = series.get(status, list())
            l.append(value)
            series[status] = l

    # track the bottom values for the bar charts
    bottom = [0] * len(bin_labels)

    fig, ax = map_utils.subplots()
    ax.set_title(f'Status of for {service} from {client_region}')
    ax.set_xlabel('Minute Window')
    ax.set_ylabel('% requests')
    ax.set_xticks(bin_positions, bin_labels)
    
    for status in sorted(all_statuses):
        data = series.get(status, [0] * len(bin_labels))
        ax.bar(bin_positions, data, bottom=bottom, label=status)

        # move the bottom up for the next series
        bottom = np.add(bottom, data)

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

    output_name = output / f"client_request_status_bar-{client_region}-{service}.png"
    fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close(fig)
Exemple #9
0
def output_requests_graph_per_region(requests_region_service_counts,
                                     first_timestamp, output):
    """
    Arguments:
        requests_region_service_counts(dict): region --> service --> window --> ClientDnsRequestCounts
        first_timestamp(datetime): timestamp for the beginning of the run
        output(Path): output folder for csv files
    """

    for region, region_data in requests_region_service_counts.items():
        services = list(region_data.keys())
        services.sort()
        get_logger().debug(f"services = {services}")

        graph_file = output / ('client_dns_lookup_percent-' + region + '.png')
        get_logger().info(f"Outputting graph file: {graph_file.name}")

        fig, ax = map_utils.subplots()
        ax.set_title(f"DNS cache percentages for services in region {region}")
        ax.set_xlabel("Time (minutes)")
        ax.set_ylim([0, 100])

        for service, service_data in region_data.items():
            times = list()
            cache_percentages = list()

            windows = set()
            for window, window_data in service_data.items():
                windows.add(window)

            windows_list = list(windows)
            windows_list.sort()

            for window in windows_list:
                #get_logger().info(f"window_data[window] = {window_data[window]}")
                time = (window -
                        first_timestamp) / datetime.timedelta(minutes=1)
                cache_percentage = (
                    1 -
                    service_data[window].get_dns_requests_percentage()) * 100

                times.append(time)
                cache_percentages.append(cache_percentage)

            series_label = f"{service}"
            ax.plot(times, cache_percentages, label=series_label)

        handles, labels = ax.get_legend_handles_labels()
        lgd = ax.legend(handles,
                        labels,
                        bbox_to_anchor=(1.04, 1),
                        loc="upper left")
        fig.savefig(graph_file.as_posix(),
                    format='png',
                    bbox_extra_artists=(lgd, ),
                    bbox_inches='tight')
        plt.close(fig)
Exemple #10
0
def process_file(first_timestamp_ms, output, csv_file):
    ncp = csv_file.stem

    get_logger().debug("Processing %s ncp: %s", csv_file, ncp)

    df = pd.read_csv(csv_file)
    df['minutes'] = (df['timestamp'] - first_timestamp_ms) / 1000 / 60
    df = df.sort_values(by=['minutes'])

    max_minutes = df['minutes'].max()
    services = df['service'].unique()
    source_regions = df['source region'].unique()
    attributes = df['attribute'].unique()

    xdata = df['minutes'].unique()
    for service in sorted(services):
        get_logger().debug("Service: %s", service)
        for attr in attributes:
            get_logger().debug("attribute: %s", attr)

            fig, ax = map_utils.subplots()
            ax.set_title(f"Inferred demand for {service} and {attr} on {ncp}")
            ax.set_xlabel('time (minutes)')
            ax.set_xlim(left=0, right=max_minutes)

            ydata = list()
            labels = list()
            for source_region in sorted(source_regions):
                get_logger().debug("source region: %s", source_region)

                plot_data = df.loc[(df['service'] == service)
                                   & (df['source region'] == source_region) &
                                   (df['attribute'] == attr)]
                label = source_region
                time_data = pd.Series(plot_data['value'].values,
                                      index=plot_data['minutes']).to_dict()
                yfilled = map_utils.fill_missing_times(xdata, time_data)

                get_logger().debug("yseries len: %d", len(yfilled))
                ydata.append(yfilled)
                labels.append(label)

            get_logger().debug("xdata len: %d", len(xdata))
            get_logger().debug("ydata len: %d", len(ydata))
            ax.stackplot(xdata, ydata, labels=labels)
            handles, labels = ax.get_legend_handles_labels()
            lgd = ax.legend(handles,
                            labels,
                            bbox_to_anchor=(1.04, 1),
                            loc="upper left")

            output_name = output / f"{ncp}-{service}-{attr}.png"
            fig.savefig(output_name.as_posix(),
                        format='png',
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight')
            plt.close(fig)
Exemple #11
0
def process_container_per_service(output_dir, data, container):
    """
    Arguments:
        output_dir(Path): base directory for output
        data(DataFrame): data to process
        container(str): container to process
    """

    max_minutes = math.ceil(data['time_minutes'].max())

    try:
        name = container.replace(".map.dcomp", "")

        container_data = data.loc[data['container'] == container]
        services = container_data['service'].unique()
        for service in services:
            fig, ax = map_utils.subplots()
            ax.set_title('network traffic for {0} - {1}'.format(name, service))
            ax.set_ylabel('bandwidth (Mbps)')
            ax.set_xlabel('time (minutes)')

            plot_data = container_data.loc[data['service'] == service].copy()

            for direction in ('RX', 'TX'):
                label = '{0}-{1}'.format(service, direction)
                ax.scatter(plot_data['time_minutes'],
                           plot_data[direction],
                           label=label,
                           s=2)

                plot_data['RX_rolling_average'] = plot_data['RX'].rolling(
                    window=rolling_average_window_size).mean()
                plot_data['TX_rolling_average'] = plot_data['TX'].rolling(
                    window=rolling_average_window_size).mean()

                direction_avg = '{0}_rolling_average'.format(direction)
                x = plot_data['time_minutes']
                y = plot_data[direction_avg]
                ax.plot(x, y, label='{0} trend'.format(direction))

            ax.set_xlim(left=0, right=max_minutes)

            handles, labels = ax.get_legend_handles_labels()
            lgd = ax.legend(handles, labels, bbox_to_anchor=(1, 1))

            output_name = output_dir / '{0}-network-{1}.png'.format(
                name, service)
            plt.savefig(output_name.as_posix(),
                        format='png',
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight')
            plt.close(fig)
    except:
        get_logger().exception("Unexpected error")
Exemple #12
0
def graph_num_clients(step_size, output_name, title, data):
    """
    Create a stacked graph per client pool.
    Arguments:
        output_name(Path): where to write the graph
        title(str): title for the graph
        step_size(int): number of milliseconds between samples
        data(DataFrame): data to graph
    """

    max_time = data['end'].max()
    clients = data['client'].unique()
    xs = list()
    ydata = dict()  # client -> data

    time_step = 0
    while time_step < max_time:
        xs.append(map_utils.timestamp_to_minutes(time_step))

        for client in sorted(clients):
            ys = ydata.get(client, list())

            y_value = data.loc[(data['start'] <= time_step)
                               & (time_step < data['end']) &
                               (data['client']
                                == client)]['num clients'].sum()
            ys.append(y_value)

            ydata[client] = ys

        time_step = time_step + step_size

    fig, ax = map_utils.subplots()
    ax.set_title(title)
    ax.set_xlabel("Time (minutes)")
    ax.set_ylabel("Number of clients")

    ax.set_xlim(left=0, right=map_utils.timestamp_to_minutes(max_time))
    stack_labels, stack_ys = zip(*(sorted(ydata.items())))
    ax.stackplot(xs, stack_ys, labels=stack_labels)

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles,
                    labels,
                    bbox_to_anchor=(1.04, 1),
                    loc="upper left")

    fig.savefig(output_name.as_posix(),
                format='png',
                bbox_extra_artists=(lgd, ),
                bbox_inches='tight')
    plt.close(fig)
Exemple #13
0
def write_memory_absolute_plot(node_name, data_memory, all_pids, pid_names, output, first_timestamp_sec):
    """
    Write out the graph of absolute memory.
    
    Arguments:
        node_name(str): name of the node
        data_memory(dict): ts(int) -> pid(int) -> memory(float)
        all_pids(set): all process names
        pid_names(dict): pid(int) -> pid name(str)
        output(Path): where the graphs are being written
    """

    timestamps = sorted(data_memory.keys())
    minutes = [ (ts - first_timestamp_sec) / 60 for ts in timestamps ]
    max_minutes = max(minutes)
    
    fig, ax = map_utils.subplots()
    ax.set_title(f"{node_name} Process memory")
    ax.set_ylabel('GB memory')
    ax.set_xlabel('time (minutes)')
    ax.set_xlim(left=0, right=max_minutes)
    
    ydata = list()
    labels = list()
    for pid in sorted(all_pids, key=make_pid_key(pid_names)):
        plot_data = list()
        for timestamp in timestamps:
            ts_data = data_memory[timestamp]
            if pid in ts_data:
                memory = bytes_to_gigabytes(ts_data[pid])
            else:
                memory = 0
            plot_data.append(memory)
        label = pid_label(pid, pid_names)
        ydata.append(plot_data)
        labels.append(label)

    ax.stackplot(minutes, ydata, labels=labels)
    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

    output_name = output / f"{node_name}_stats_memory_absolute.png"
    fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close(fig)
Exemple #14
0
def write_network_bandwidth_plot(output_dir, first_timestamp_sec, node_name, graph_label, file_label, bandwidth_data, ip_info):
    """
    Arguments:
        output_dir(Path): base directory for output
        label(str): label for the data
        bandwidth_data(dict): interface -> ts -> value
        first_timestamp_sec(int): the first timestamp for relative computation
        ip_info(dict): node -> ifce -> ip
    """
    node_ip_info = ip_info.get(node_name, dict())
    get_logger().debug("Got ip info for %s -> %s", node_name, node_ip_info)

    fig, ax = map_utils.subplots()
    ax.set_title(graph_label)
    ax.set_ylabel('mbps')
    ax.set_xlabel('time (minutes)')

    max_minutes=0
    for interface, interface_data in bandwidth_data.items():
        xdata = list()
        ydata = list()
        for ts, value in interface_data.items():
            rel_sec = ts - first_timestamp_sec
            rel_min = rel_sec / 60
            mbps = value / 1024 / 1024 * 8 # 1024 kb / byte, 1024 kb / mb, 8 bits per byte
            xdata.append(rel_min)
            ydata.append(mbps)
            max_minutes = max(max_minutes, rel_min)

        if interface in node_ip_info:
            ip = f" - {node_ip_info[interface]}"
        else:
            ip = ""
        ax.plot(xdata, ydata, label=f"{interface}{ip}")

    ax.set_xlim(left=0, right=max_minutes)
        
    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

    output_name = output_dir / f"{node_name}_stats_network-{file_label}.png"
    fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close(fig)
Exemple #15
0
def write_server_data(output_dir, all_services, server_data):
    fig, ax = map_utils.subplots()
    ax.set_title('Server processing duration over time per service')
    ax.set_ylabel('duration (s)')
    ax.set_xlabel('time (minutes)')

    for service in all_services:
        plot_data = server_data.loc[ (server_data['service'] == service) ]

        label = "{0}".format(service)
        ax.scatter(plot_data['time_minutes'], plot_data['latency_seconds'], label=label, s=1)

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

    output_name = output_dir / 'server_processing_duration_per_service.png'
    fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')

    plt.close(fig)
Exemple #16
0
def output_graphs(first_timestamp_ms, output, data, load_name):
    all_times = find_all_times(data)
    all_times_minutes = [(float(t) - first_timestamp_ms) / 1000.0 / 60.0
                         for t in all_times]
    max_minutes = max(all_times_minutes)

    for app, app_values in data.items():
        for attr, attr_values in app_values.items():
            fig, ax = map_utils.subplots()
            ax.set_title(f"{load_name} for service: {app} attribute: {attr}")
            ax.set_xlabel('time (minutes)')
            ax.set_xlim(left=0, right=max_minutes)

            plot_data = dict()
            for container_name, time_values in attr_values.items():
                time_values_minutes = dict()
                for timestamp, value in time_values.items():
                    time = (float(timestamp) -
                            first_timestamp_ms) / 1000.0 / 60.0
                    get_logger().debug("time %s timestamp %s first_time %s",
                                       time, timestamp, first_timestamp_ms)
                    time_values_minutes[time] = value

                yinterp = map_utils.fill_missing_times(all_times_minutes,
                                                       time_values_minutes)

                plot_data[container_name] = yinterp

            series_labels, ydata = zip(*sorted(plot_data.items()))
            plt.stackplot(all_times_minutes, ydata, labels=series_labels)

            handles, labels = ax.get_legend_handles_labels()
            lgd = ax.legend(handles,
                            labels,
                            bbox_to_anchor=(1.04, 1),
                            loc="upper left")

            output_name = output / f"container-{load_name}-{app}-{attr}.png"
            plt.savefig(output_name.as_posix(),
                        format='png',
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight')
            plt.close(fig)
Exemple #17
0
def write_per_client(output_dir, all_services, all_clients, client_data):
    for client in all_clients:
        fig, ax = map_utils.subplots()
        ax.set_title('{0} request duration over time per service'.format(client))
        ax.set_ylabel('duration (s)')
        ax.set_xlabel('time (minutes)')

        for service in all_services:
            plot_data = client_data.loc[ (client_data['client'] == client) & (client_data['service'] == service) ]

            label = "{0}".format(service)
            ax.scatter(plot_data['time_minutes'], plot_data['latency_seconds'], label=label, s=1)

        handles, labels = ax.get_legend_handles_labels()
        lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

        output_name = output_dir / '{0}_request_duration.png'.format(client)
        fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')

        plt.close(fig)
Exemple #18
0
def plot_dns_container_resolution_counts(output_dir, dns_resolution_counts,
                                         region, service):
    """
    Graph individual overflow plans on a graph.
    
    Arguments:
        output_dir(Path): directory to write the results in
        dns_resolution_counts(DataFrame): dns resolution data for from_region and service
        region(str): region to generate graph for
        service(str): service to generate graph for
    """

    containers = list(
        dns_resolution_counts.loc[:, (dns_resolution_counts != 0).any(
            axis=0)].filter(regex='.+_c.+', axis='columns').columns)
    plot_data = dns_resolution_counts

    fig, ax = map_utils.subplots()
    ax.set_title(
        f'DNS resolution counts for containers of service {service} in region {region}'
    )
    ax.set_ylabel('Resolutions')
    ax.set_xlabel('time (minutes)')

    for container in sorted(list(containers)):
        ax.plot(plot_data['time_minutes'],
                plot_data[container],
                label="{0}".format(container))

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles,
                    labels,
                    bbox_to_anchor=(1.04, 1),
                    loc="upper left")

    output_name = output_dir / f"dns-container-resolution-counts-{region}-{service}.png"
    fig.savefig(output_name.as_posix(),
                format='png',
                bbox_extra_artists=(lgd, ),
                bbox_inches='tight')
    plt.close(fig)
Exemple #19
0
def plot_dns_region_resolution_counts(output_dir, dns_resolution_counts,
                                      all_regions, from_region, service):
    """
    Graph individual overflow plans on a graph.
    
    Arguments:
        output_dir(Path): directory to write the results in
        dns_resolution_counts(DataFrame): dns resolution data for from_region and service
        all_regions(set): all regions
        from_region(str): region to generate graph for
        service(str): service to generate graph for
    """
    plot_data = dns_resolution_counts

    fig, ax = map_utils.subplots()
    ax.set_title(
        f'DNS region resolution counts for service {service} in region {from_region}'
    )
    ax.set_ylabel('Resolutions')
    ax.set_xlabel('time (minutes)')

    for overflow_region in sorted(list(all_regions)):
        ax.plot(plot_data['time_minutes'],
                plot_data[overflow_region],
                label="{0}".format(overflow_region))

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles,
                    labels,
                    bbox_to_anchor=(1.04, 1),
                    loc="upper left")

    output_name = output_dir / f"dns-region-resolution-counts-{from_region}-{service}.png"
    fig.savefig(output_name.as_posix(),
                format='png',
                bbox_extra_artists=(lgd, ),
                bbox_inches='tight')
    plt.close(fig)
def output_graph(output, ncp, xs, ys, first_timestamp_ms):
    """
    Arguments:
        output(Path): output directory
        ncp(str): name of the NCP the graph is for
        xs: list of x-values
        ys(dict): neighbor to y-values
        first_timestamp_ms(int): initial timestamp to subtract from values
    """
    fig, ax = map_utils.subplots()
    ax.set_title(f"RLG Resource report lag for node {ncp}")
    ax.set_xlabel("Time (minutes)")
    ax.set_ylabel("Difference (seconds)")

    minutes = [
        map_utils.timestamp_to_minutes(x - first_timestamp_ms) for x in xs
    ]
    max_minutes = max(minutes)
    for node_name, plot_data in ys.items():
        get_logger().debug(
            "Graphing NCP %s with neighbor %s. x.len: %d y.len: %d", ncp,
            node_name, len(minutes), len(plot_data))
        ax.scatter(minutes, plot_data, label=node_name, s=1)

    ax.set_xlim(left=0, right=max_minutes)
    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles,
                    labels,
                    bbox_to_anchor=(1.04, 1),
                    loc="upper left")

    output_name = output / f'rlg-node-resource-report-time-lag_{ncp}.png'
    plt.savefig(output_name.as_posix(),
                format='png',
                bbox_extra_artists=(lgd, ),
                bbox_inches='tight')
    plt.close(fig)
Exemple #21
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    parser = argparse.ArgumentParser()
    parser.add_argument("-l",
                        "--logconfig",
                        dest="logconfig",
                        help="logging configuration (default: logging.json)",
                        default='logging.json')
    parser.add_argument("-c",
                        "--chart-output",
                        dest="chart_output",
                        help="Output of MAPChartGeneration(Required)",
                        required=True)
    parser.add_argument("-o",
                        "--output",
                        dest="output",
                        help="Output directory (Required)",
                        required=True)
    parser.add_argument("--interactive",
                        dest="interactive",
                        action="store_true",
                        help="If specified, display the plots")

    args = parser.parse_args(argv)

    map_utils.setup_logging(default_path=args.logconfig)

    chart_output = Path(args.chart_output)
    if not chart_output.exists():
        get_logger().error("%s does not exist", chart_output)
        return 1

    output_dir = Path(args.output)
    output_dir.mkdir(parents=True, exist_ok=True)

    client_demand_dir = chart_output / 'client_demand'

    frames = list()
    for file in client_demand_dir.glob('num_clients-*.csv'):
        match = re.match(r'^num_clients-(.*)\.csv$', file.name)
        if not match:
            continue
        service = match.group(1)
        df = pd.read_csv(file)
        df['service'] = service
        frames.append(df)

    data = pd.concat(frames, ignore_index=True)
    data['time_minutes'] = (data['time'] - data['time'].min()) / 1000 / 60

    all_services = data['service'].unique()

    fig, ax = map_utils.subplots()
    ax.set_title('number of clients per service over time')
    ax.set_ylabel('number of clients')
    ax.set_xlabel('time (minutes)')

    for service in all_services:
        plot_data = data.loc[(data['service'] == service)]
        ax.step(plot_data['time_minutes'],
                plot_data['num clients'],
                where='post',
                label=service)

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles,
                    labels,
                    bbox_to_anchor=(1.04, 1),
                    loc="upper left")

    if args.interactive:
        plt.show()
    else:
        output_name = output_dir / 'client_demand.png'
        plt.savefig(output_name.as_posix(),
                    format='png',
                    bbox_extra_artists=(lgd, ),
                    bbox_inches='tight')

    plt.close(fig)
def process_service(filename, service, host_ip, subnet_to_region, hopcount, output):
    """
    Arguments:
        filename(Path): the file to read the client request information from
        service(str): service name
        host_ip(DataFrame): host and IP address information
        subnet_to_region(DataFrame): mapping of subnets to regions
        hopcount(DataFrame): regional hop count iformation
        output(Path): directory to output to
    """
    
    df = pd.read_csv(filename)
    if len(df.index) < 1:
        get_logger().warning("No data found in %s", filename)
        return
    
    df['client lower'] = df['client'].str.lower()
    df['server lower'] = df['server'].str.lower()
    df = df.drop(columns=['event', 'time_sent', 'time_ack_received', 'latency', 'hop_count'], axis=1)
    
    # convert to NA after doing lower call so that we don't end up trying to lowercase a NaN server (unknown host)
    df = df.replace('?', np.NaN)
    
    # remove unknown hosts
    df = df[~df['server'].isna()]

    # get client IP addresses
    df = df.merge(host_ip, how='left', left_on=['client lower'], right_on=['host lower']).drop(['host', 'host lower'], axis=1)
    df = df.rename(columns={'ip': 'client ip'})

    # get server IP addresses
    df = df.merge(host_ip, how='left', left_on=['server lower'], right_on=['host lower']).drop(['host', 'host lower'], axis=1)
    df = df.rename(columns={'ip': 'server ip'})    

    # get region information
    df['client region'] = df['client ip'].apply(lambda x: ip_to_region(x, subnet_to_region))
    df['server region'] = df['server ip'].apply(lambda x: ip_to_region(x, subnet_to_region))

    # get hopcounts
    df = df.merge(hopcount, how='left', left_on=['client region', 'server region'], right_on=['from', 'to'])

    for client in df['client'].unique():
        # plot successes
        successes = df[df.success]
        counts = successes[successes['client'] == client].groupby(by='hop count').agg('count')['timestamp']

        fig, ax = map_utils.subplots()
        ax.set_title(f'hop counts for {client} {service} - {counts.sum()} successful requests')

        color_palette = map_utils.get_plot_colors()
        colors = list()
        labels=list()
        for idx in counts.index:
            colors.append(color_palette[int(idx)])
            labels.append(int(idx))
            
        ax.pie(counts, labels=labels, colors=colors, autopct='%1.1f%%')
        ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

        output_name = output / f'hopcount-{client}-{service}-successes.png'
        fig.savefig(output_name.as_posix(), format='png', bbox_inches='tight')
        plt.close(fig)
        
        # plot failures
        failures = df[~df.success]
        counts = failures[failures['client'] == client].groupby(by='hop count').agg('count')['timestamp']

        fig, ax = map_utils.subplots()
        ax.set_title(f'hop counts for {client} {service} - {counts.sum()} failed requests')

        ax.pie(counts, labels=counts.index, autopct='%1.1f%%')
        ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

        output_name = output / f'hopcount-{client}-{service}-failures.png'
        fig.savefig(output_name.as_posix(), format='png', bbox_inches='tight')
        plt.close(fig)

    for client_region in df['client region'].unique():
        # plot successes
        successes = df[df.success]
        counts = successes[successes['client region'] == client_region].groupby(by='hop count').agg('count')['timestamp']

        fig, ax = map_utils.subplots()
        ax.set_title(f'hop counts for client region {client_region} {service} - {counts.sum()} successful requests')

        color_palette = map_utils.get_plot_colors()
        colors = list()
        labels=list()
        for idx in counts.index:
            colors.append(color_palette[int(idx)])
            labels.append(int(idx))
            
        ax.pie(counts, labels=labels, colors=colors, autopct='%1.1f%%')
        ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

        output_name = output / f'hopcount-region_{client_region}-{service}-successes.png'
        fig.savefig(output_name.as_posix(), format='png', bbox_inches='tight')
        plt.close(fig)
        
        # plot failures
        failures = df[~df.success]
        counts = failures[failures['client region'] == client_region].groupby(by='hop count').agg('count')['timestamp']

        fig, ax = map_utils.subplots()
        ax.set_title(f'hop counts for client region {client_region} {service} - {counts.sum()} failed requests')

        ax.pie(counts, labels=counts.index, autopct='%1.1f%%')
        ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

        output_name = output / f'hopcount-region_{client_region}-{service}-failures.png'
        fig.savefig(output_name.as_posix(), format='png', bbox_inches='tight')
        plt.close(fig)
Exemple #23
0
def graph(output, first_timestamp, label, data_type, node_data):
    """
    Graph network data.
    
    Arguments:
        output(Path): output directory
        first_timestamp(int): first timestamp seen in the data
        label(str): "Summary" or "Report"
        label(str): "demand" or "load"
        node_data(dict): service -> NetworkData
    """

    for node_name, network_data in node_data.items():
        rx_frames = list()
        tx_frames = list()
        all_frames = list()

        rx_fig, rx_ax = map_utils.subplots()
        rx_ax.set_title(f"{node_name} {label} {data_type} RX data")
        rx_ax.set_xlabel("Time (minutes)")

        tx_fig, tx_ax = map_utils.subplots()
        tx_ax.set_title(f"{node_name} {label} {data_type} TX data")
        tx_ax.set_xlabel("Time (minutes)")

        all_fig, all_ax = map_utils.subplots()
        all_ax.set_title(f"{node_name} {label} {data_type} ALL data")
        all_ax.set_xlabel("Time (minutes)")

        max_minutes = 0
        rx_empty_plot = True
        tx_empty_plot = True
        all_empty_plot = True
        for service, service_data in network_data.service.items():
            if len(service_data.rx) > 0:
                rx_empty_plot = False
                rx_pairs = sorted(service_data.rx.items())
                rx_timestamps, rx_values = zip(*rx_pairs)
                rx_times = [
                    map_utils.timestamp_to_minutes(float(t) - first_timestamp)
                    for t in rx_timestamps
                ]
                rx_series_label = f"{service} RX"
                rx_ax.plot(rx_times, rx_values, label=rx_series_label)
                rx_frames.append(
                    pd.DataFrame(list(rx_values),
                                 index=rx_times,
                                 columns=[rx_series_label]))
                max_minutes = max(max_minutes, max(rx_times))

            if len(service_data.tx) > 0:
                tx_empty_plot = False
                tx_pairs = sorted(service_data.tx.items())
                tx_timestamps, tx_values = zip(*tx_pairs)
                tx_times = [
                    map_utils.timestamp_to_minutes(float(t) - first_timestamp)
                    for t in tx_timestamps
                ]
                tx_series_label = f"{service} TX"
                tx_ax.plot(tx_times, tx_values, label=tx_series_label)
                tx_frames.append(
                    pd.DataFrame(list(tx_values),
                                 index=tx_times,
                                 columns=[tx_series_label]))
                max_minutes = max(max_minutes, max(tx_times))

            if len(service_data.all_traffic) > 0:
                all_empty_plot = False
                all_pairs = sorted(service_data.all_traffic.items())
                all_timestamps, all_values = zip(*all_pairs)
                all_times = [
                    map_utils.timestamp_to_minutes(float(t) - first_timestamp)
                    for t in all_timestamps
                ]
                all_series_label = f"{service} ALL"
                all_ax.plot(all_times, all_values, label=all_series_label)
                all_frames.append(
                    pd.DataFrame(list(all_values),
                                 index=all_times,
                                 columns=[all_series_label]))
                max_minutes = max(max_minutes, max(all_times))

        if not rx_empty_plot:
            rx_ax.set_xlim(left=0, right=max_minutes)
            rx_handles, rx_labels = rx_ax.get_legend_handles_labels()
            rx_lgd = rx_ax.legend(rx_handles,
                                  rx_labels,
                                  bbox_to_anchor=(1.04, 1),
                                  loc="upper left")
            rx_output_name = output / f"network-{node_name}-{label}-RX-{data_type}.png"
            rx_fig.savefig(rx_output_name.as_posix(),
                           format='png',
                           bbox_extra_artists=(rx_lgd, ),
                           bbox_inches='tight')
            rx_df = pd.concat(rx_frames, axis=1)
            rx_df.to_csv(output /
                         f"network-{node_name}-{label}-RX-{data_type}.csv",
                         index_label="relative minutes")

        if not tx_empty_plot:
            tx_ax.set_xlim(left=0, right=max_minutes)
            tx_handles, tx_labels = tx_ax.get_legend_handles_labels()
            tx_lgd = tx_ax.legend(tx_handles,
                                  tx_labels,
                                  bbox_to_anchor=(1.04, 1),
                                  loc="upper left")
            tx_output_name = output / f"network-{node_name}-{label}-TX-{data_type}.png"
            tx_fig.savefig(tx_output_name.as_posix(),
                           format='png',
                           bbox_extra_artists=(tx_lgd, ),
                           bbox_inches='tight')
            tx_df = pd.concat(tx_frames, axis=1)
            tx_df.to_csv(output /
                         f"network-{node_name}-{label}-TX-{data_type}.csv",
                         index_label="relative minutes")

        if not all_empty_plot:
            all_ax.set_xlim(left=0, right=max_minutes)
            all_handles, all_labels = all_ax.get_legend_handles_labels()
            all_lgd = all_ax.legend(all_handles,
                                    all_labels,
                                    bbox_to_anchor=(1.04, 1),
                                    loc="upper left")
            all_output_name = output / f"network-{node_name}-{label}-ALL-{data_type}.png"
            all_fig.savefig(all_output_name.as_posix(),
                            format='png',
                            bbox_extra_artists=(all_lgd, ),
                            bbox_inches='tight')
            all_df = pd.concat(all_frames, axis=1)
            all_df.to_csv(output /
                          f"network-{node_name}-{label}-ALL-{data_type}.csv",
                          index_label="relative minutes")

        plt.close(rx_fig)
        plt.close(tx_fig)
        plt.close(all_fig)
def plot_region_service_plan_type(output_dir, plan_type, plans, all_regions,
                                  region, service):
    """
    Graph individual overflow plans on a graph.
    
    Arguments:
        output_dir(Path): directory to write the results in
        plan_type(str): RLG, DCOP, DNS
        plans(DataFrame): plan update information
        all_regions(set): all regions
        region(str): region to generate graph for
        service(str): service to generate graph for
    """
    plot_data = plans.loc[(plans['plan_region'] == region)
                          & (plans['service'] == service)]
    max_x = plot_data['time_minutes'].max()
    if not math.isfinite(max_x):
        get_logger().warning(
            "Non-finite max_x found %s graphing %s for %s and %s", max_x,
            plan_type, region, service)
        return

    get_logger().info("graphing %s for %s and %s with max_x %s", plan_type,
                      region, service, max_x)

    fig, ax = map_utils.subplots()
    ax.set_title(
        f'{plan_type} overflow plans for service {service} in region {region}')
    ax.set_ylabel('% dispatch to region')
    ax.set_xlabel('time (minutes)')
    ax.set_xlim(left=0, right=max_x)

    fig_stack, ax_stack = map_utils.subplots()
    ax_stack.set_title(
        f'{plan_type} overflow plans for service {service} in region {region}')
    ax_stack.set_ylabel('% dispatch to region')
    ax_stack.set_xlabel('time (minutes)')
    ax_stack.set_xlim(left=0, right=max_x)

    times = plot_data['time_minutes']

    # track the bottom values for the bar charts
    bottom = [0] * len(times)

    labels = list()
    ydata = list()
    for overflow_region in all_regions:
        data = plot_data[overflow_region]
        ax.step(times, data, label=overflow_region, where='post')

        ax_stack.bar(times,
                     data,
                     bottom=bottom,
                     label=overflow_region,
                     width=1)

        # move the bottom up for the next series
        bottom = np.add(bottom, data)

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles,
                    labels,
                    bbox_to_anchor=(1.04, 1),
                    loc="upper left")

    output_name = output_dir / f"overflow_analysis-{plan_type}-{region}-{service}.png"
    fig.savefig(output_name.as_posix(),
                format='png',
                bbox_extra_artists=(lgd, ),
                bbox_inches='tight')
    plt.close(fig)

    handles_stack, labels_stack = ax_stack.get_legend_handles_labels()
    lgd_stack = ax_stack.legend(handles_stack,
                                labels_stack,
                                bbox_to_anchor=(1.04, 1),
                                loc="upper left")

    output_name_stack = output_dir / f'overflow_analysis-{plan_type}-{region}-{service}_stacked.png'
    fig_stack.savefig(output_name_stack.as_posix(),
                      format='png',
                      bbox_extra_artists=(lgd_stack, ),
                      bbox_inches='tight')
    plt.close(fig_stack)
def plot_expected(output_dir, plan_label, source_region, service, times,
                  region_weights):
    """
    Arguments:
        output_dir(Path): base output directory
        plan_label(str): type of plan (RLG, DCOP, DNS)
        source_region(str): name of region that the traffic starts at
        service(str): the service to consider
        times(list): from compute_expected
        region_weights(dict): from computed_expected
    """

    fig, ax = map_utils.subplots()
    ax.set_title(
        f'Expected percentages of traffic for {service} when starting at {source_region} based on {plan_label}'
    )
    ax.set_ylabel('% dispatch to region')
    ax.set_xlabel('time (minutes)')
    ax.set_xlim(left=0, right=max(times))

    fig_stack, ax_stack = map_utils.subplots()
    ax_stack.set_title(
        f'Expected percentages of traffic for {service} when starting at {source_region} based on {plan_label}'
    )
    ax_stack.set_ylabel('% dispatch to region')
    ax_stack.set_xlabel('time (minutes)')
    ax_stack.set_xlim(left=0, right=max(times))

    # track the bottom values for the bar charts
    bottom = [0] * len(times)

    ydata = list()
    labels = list()
    for region, weight_list in region_weights.items():
        ax.step(times, weight_list, label=region, where='post')

        ax_stack.bar(times, weight_list, bottom=bottom, label=region, width=1)

        # move the bottom up for the next series
        bottom = np.add(bottom, weight_list)

    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles,
                    labels,
                    bbox_to_anchor=(1.04, 1),
                    loc="upper left")

    output_name = output_dir / f'overflow_analysis_expected-{plan_label}-{source_region}-{service}.png'
    fig.savefig(output_name.as_posix(),
                format='png',
                bbox_extra_artists=(lgd, ),
                bbox_inches='tight')
    plt.close(fig)

    handles_stack, labels_stack = ax_stack.get_legend_handles_labels()
    lgd_stack = ax_stack.legend(handles_stack,
                                labels_stack,
                                bbox_to_anchor=(1.04, 1),
                                loc="upper left")

    output_name_stack = output_dir / f'overflow_analysis_expected-{plan_label}-{source_region}-{service}_stacked.png'
    fig_stack.savefig(output_name_stack.as_posix(),
                      format='png',
                      bbox_extra_artists=(lgd_stack, ),
                      bbox_inches='tight')
    plt.close(fig_stack)
Exemple #26
0
def process_node(output_dir, data, node):
    """
    Arguments:
        output_dir(Path): base directory for output
        data(DataFrame): the data to process
        node(str): name of the node to process
    """

    node_name = node.replace(".map.dcomp", "")

    directions = ['RX', 'TX']

    max_minutes = math.ceil(data['time_minutes'].max())

    try:
        fig, ax = map_utils.subplots()
        ax.set_title('network traffic for {0}'.format(node_name))
        ax.set_ylabel('bandwidth (Mbps)')
        ax.set_xlabel('time (minutes)')

        ydata = dict()
        node_data = data.loc[data['node'] == node]
        services = node_data['service'].unique()
        for service in services:
            plot_data = node_data.loc[node_data['service'] == service]

            for direction in directions:
                label = '{0}-{1}'.format(service, direction)
                ax.scatter(plot_data['time_minutes'],
                           plot_data[direction],
                           label=label,
                           s=2)

                service_data = plot_data.drop(columns='time').set_index(
                    'time_minutes')[direction].rename(f"{service} {direction}")
                yd = ydata.get(direction, list())
                yd.append(service_data)
                ydata[direction] = yd

        ax.set_xlim(left=0, right=max_minutes)

        handles, labels = ax.get_legend_handles_labels()
        lgd = ax.legend(handles, labels, bbox_to_anchor=(1, 1))

        output_name = output_dir / '{0}-network.png'.format(node_name)
        plt.savefig(output_name.as_posix(),
                    format='png',
                    bbox_extra_artists=(lgd, ),
                    bbox_inches='tight')
        plt.close(fig)

        for direction in directions:
            yd = ydata[direction]
            ydata_combined = pd.concat(yd, axis=1).fillna(0)
            fig_stacked, ax_stacked = map_utils.subplots()
            ax_stacked.set_title(
                f'{direction} network traffic for {node_name}')
            ax_stacked.set_ylabel('bandwidth (mbps)')
            ax_stacked.set_xlabel('time (minutes)')
            ax_stacked.stackplot(ydata_combined.index.values,
                                 ydata_combined.T,
                                 labels=ydata_combined.columns.values)

            ax_stacked.set_xlim(left=0, right=max_minutes)

            handles_stacked, labels_stacked = ax_stacked.get_legend_handles_labels(
            )
            lgd_stacked = ax_stacked.legend(handles_stacked,
                                            labels_stacked,
                                            bbox_to_anchor=(1.04, 1),
                                            loc="upper left")

            output_name_stacked = output_dir / f"{node_name}-network_stacked_{direction}.png"
            fig_stacked.savefig(output_name_stacked.as_posix(),
                                format='png',
                                bbox_extra_artists=(lgd_stacked, ),
                                bbox_inches='tight')
            plt.close(fig_stacked)
    except:
        get_logger().exception("Unexpected error")
def output_graphs_per_region(first_timestamp, output, capacity, data, label):
    """
    Create a graph per node attribute.
    Creates files with names <label>-<attribute>-<region>.png
    
    Args:
        first_timestamp(datetime.datetime): when the simulation started 
        output (Path): output directory
        data (dict): region -> attr -> app -> timestamp -> value
        label (str): used to label the graph and generate the filenames
        capacity: region -> attr -> app -> timestamp -> value
    """

    first_timestamp_ms = first_timestamp.timestamp() * 1000

    max_minutes = 0
    try:
        for region, region_data in data.items():
            capacity_region = capacity[region]
            
            for attr, attr_data in region_data.items():
                frames = list()
                fig, ax = map_utils.subplots()
                ax.set_title(f"{attr} {label} in {region}")
                ax.set_xlabel("Time (minutes)")
                
                stack_xs = None
                stack_ys = list()
                stack_labels = list()
                total_capacity = None
                if attr in capacity_region and 'QueueLength' != attr:
                    for app, app_data in sorted(capacity_region[attr].items()):
                        if len(app_data) > 0:
                            pairs = sorted(app_data.items())
                            timestamps, values = zip(*pairs)
                            times = [map_utils.timestamp_to_minutes(float(t) - first_timestamp_ms) for t in timestamps]
                            max_minutes = max(max_minutes, max(times))

                            series_label=f"{app} capacity"
                            frames.append(pd.DataFrame(list(values), index=list(times), columns=[series_label]))
                            ax.plot(times, values, label=series_label)

                            if total_capacity is None:
                                total_capacity = app_data.copy()
                            else:
                                total_capacity = {k: total_capacity.get(k, 0) + app_data.get(k, 0) for k in set(total_capacity) | set(app_data)}

                app_timestamps = None
                for app, app_data in sorted(attr_data.items()):
                    if len(app_data) > 0:
                        pairs = sorted(app_data.items())
                        timestamps, values = zip(*pairs)
                        times = [map_utils.timestamp_to_minutes(float(t) - first_timestamp_ms) for t in timestamps]
                        max_minutes = max(max_minutes, max(times))

                        if app_timestamps is None:
                            app_timestamps = timestamps
                        
                        frames.append(pd.DataFrame(list(values), index=list(times), columns=[app]))
                        ax.plot(times, values, label=app)

                        if stack_xs is None:
                            stack_xs = times
                        stack_ys.append(values)
                        stack_labels.append(app)

                ax.set_xlim(left=0, right=max_minutes)                        
                handles, labels = ax.get_legend_handles_labels()
                lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

                output_name = output / f"{label}-{attr}-{region}.png"
                fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
                plt.close(fig)
                
                # write out CSV of the plot data
                df = pd.concat(frames, axis=1)
                df.to_csv(output / f"{label}-{attr}-{region}.csv", index_label="relative minutes")

                if app_timestamps is not None and total_capacity is not None:
                    # create stacked plot
                    fig, ax = map_utils.subplots()
                    ax.set_title(f"{attr} {label} in {region}")
                    ax.set_xlabel("Time (minutes)")
                    ax.set_xlim(left=0, right=max_minutes)                        
                    ax.stackplot(stack_xs, stack_ys, labels=stack_labels)

                    total_capacity = map_utils.fill_missing_times(app_timestamps, total_capacity)
                    ax.plot(stack_xs, total_capacity, label="Total allocated capacity")

                    handles, labels = ax.get_legend_handles_labels()
                    lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

                    output_name = output / f"{label}-{attr}-{region}_stacked.png"
                    fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
                    plt.close(fig)
                elif 'QueueLength' != attr:
                    get_logger().warning("Missing data to draw stackplot for %s %s in %s", label, attr, region)
                
    except:
        get_logger().exception("Unexpected error")
Exemple #28
0
def output_graphs(first_time, output, data, container_expected_queue_lengths,
                  load_name, container_queue_length_capacity):
    '''
    first_time (int): reference time in ms for run
    output: output folder for graphs
    data: resource report data
    container_expected_queue_lengths: estimated queeu lengths from processing latency csv files
    container_queue_length_capacity: queue length capacity for containers
    '''
    _, all_times = find_first_time(data)

    all_times_minutes = [(float(t) - first_time) / 1000.0 / 60.0
                         for t in all_times]
    get_logger().debug("First time %s", first_time)
    max_minutes = max(all_times_minutes)

    for app, app_values in data.items():
        container_data = dict()

        for attr, attr_values in app_values.items():
            if attr == "CPU" or attr == "QueueLength":
                for container_name, time_values in attr_values.items():
                    container_data.setdefault(container_name,
                                              dict())[attr] = time_values

        app_total_queue_lengths = 0
        app_total_queue_lengths_within_capacity = 0

        for container_name, attr_data in container_data.items():
            fig, ax = map_utils.subplots()
            ax.set_title(
                f"{load_name} for service: {app}, container: {container_name}")
            ax.set_xlabel('time (minutes)')
            ax.set_xlim(left=0, right=max_minutes)

            queue_lengths = 0
            queue_lengths_within_capacity = 0

            if container_queue_length_capacity:
                ax.set_ylim(top=2.0)

            for attr, time_values in attr_data.items():
                time_values_minutes = dict()
                for timestamp, value in time_values.items():
                    time = (float(timestamp) - first_time) / 1000.0 / 60.0
                    get_logger().debug("time %s timestamp %s first_time %s",
                                       time, timestamp, first_time)

                    if attr == "QueueLength" and container_queue_length_capacity != None:
                        queue_lengths += value
                        queue_lengths_within_capacity += min(
                            value, container_queue_length_capacity)
                        time_values_minutes[
                            time] = value / container_queue_length_capacity
                    else:
                        time_values_minutes[time] = value

                yinterp = map_utils.fill_missing_times(all_times_minutes,
                                                       time_values_minutes)

                times = sorted(list(time_values_minutes.keys()))
                values = list()

                for time in times:
                    values.append(time_values_minutes[time])

                if attr == "QueueLength":
                    #plt.scatter(times, values, label=attr, color='red', marker='o')
                    plt.plot(all_times_minutes,
                             yinterp,
                             label=attr,
                             color='red')
                else:
                    #plt.scatter(times, values, label=attr, marker='o')
                    plt.plot(all_times_minutes, yinterp, label=attr)

                get_logger().debug(
                    f'container_expected_queue_lengths.keys(): {container_expected_queue_lengths.keys()}'
                )

            # plot estimated queue lengthss
            if app in container_expected_queue_lengths.keys():
                times = sorted(
                    list(container_expected_queue_lengths[app]
                         [container_name].keys()))
                times_minutes = [(float(time) - first_time) / 1000.0 / 60.0
                                 for time in times]

                if container_queue_length_capacity != None:
                    queue_lengths = [
                        container_expected_queue_lengths[app][container_name]
                        [time] / container_queue_length_capacity
                        for time in times
                    ]
                else:
                    queue_lengths = [
                        container_expected_queue_lengths[app][container_name]
                        [time] for time in times
                    ]

                plt.plot(times_minutes,
                         queue_lengths,
                         label='Estimated queue length',
                         color='black')

            handles, labels = ax.get_legend_handles_labels()
            lgd = ax.legend(handles,
                            labels,
                            bbox_to_anchor=(1.04, 1),
                            loc="upper left")

            output_name = output / f"container-{load_name}-{app}-{container_name}-CPU_QueueLength.png"
            plt.savefig(output_name.as_posix(),
                        format='png',
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight')
            plt.close(fig)

            get_logger().info(
                f'container {container_name}: queue_lengths: {queue_lengths}, queue_lengths_within_capacity: {queue_lengths_within_capacity}, percent queue lengths within capacity: {[queue_lengths_within_capacity / queue_lengths if queue_lengths > 0 else float("NaN")]}'
            )

            app_total_queue_lengths += queue_lengths
            app_total_queue_lengths_within_capacity += queue_lengths_within_capacity

        get_logger().info(
            f'app {app}: queue_lengths: {app_total_queue_lengths}, queue_lengths_within_capacity: {app_total_queue_lengths_within_capacity}, percent queue lengths within capacity: {app_total_queue_lengths_within_capacity / app_total_queue_lengths}'
        )
def output_region_capacity_graph_for_app(first_timestamp, output, capacity, app):
    """
    Create a graph per node attribute for the specified service with a series for each region.
    Creates files with names capacity-<attribute>-<app>.png
    
    Args:
        first_timestamp(datetime.datetime): when the simulation started 
        output (Path): output directory
        capacity (boolean): if true output a graph of capacity only
        app (str): name of the service to generate the graph for
    """

    first_timestamp_ms = first_timestamp.timestamp() * 1000
    max_minutes = 0
    try:
        label='capacity'

        all_regions = set(capacity.keys())
        all_attrs = set()
        for region, region_data in capacity.items():
            all_attrs.update(region_data.keys())

        for attr in all_attrs:
            if 'QueueLength' == attr:
                # graphing queue length capacity doesn't work well because we've hardcoded it to be a big number
                continue

            frames = list()
            fig, ax = map_utils.subplots()
            ax.set_title(f"{attr} {label} for {app}")
            ax.set_xlabel("Time (minutes)")
            
            stack_xs = None
            stack_ys = list()
            stack_labels = list()

            for region in sorted(all_regions):
                region_data = capacity.get(region, dict())
                attr_data = region_data.get(attr, dict())

                app_data = attr_data.get(app, dict())
                if len(app_data) > 0:
                    pairs = sorted(app_data.items())
                    timestamps, values = zip(*pairs)
                    times = [map_utils.timestamp_to_minutes(float(t) - first_timestamp_ms) for t in timestamps]
                    max_minutes = max(max_minutes, max(times))

                    series_label=f"{region} capacity"
                    frames.append(pd.DataFrame(list(values), index=list(times), columns=[series_label]))
                    ax.plot(times, values, label=series_label)

                    if stack_xs is None:
                        stack_xs = times
                    stack_ys.append(values)
                    stack_labels.append(series_label)
                    
            ax.set_xlim(left=0, right=max_minutes)
            handles, labels = ax.get_legend_handles_labels()
            lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

            output_name = output / f"{label}-{attr}-{app}.png"
            fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
            plt.close(fig)
            
            # write out CSV of the plot data
            df = pd.concat(frames, axis=1)
            df.to_csv(output / f"{label}-{attr}-{app}.csv", index_label="relative minutes")

            # create stacked plot
            fig, ax = map_utils.subplots()
            ax.set_title(f"{attr} {label} for {app}")
            ax.set_xlabel("Time (minutes)")
            ax.set_xlim(left=0, right=max_minutes)
            ax.stackplot(stack_xs, stack_ys, labels=stack_labels)

            handles, labels = ax.get_legend_handles_labels()
            lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

            output_name = output / f"{label}-{attr}-{app}_stacked.png"
            fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
            plt.close(fig)
            
    except:
        get_logger().exception("Unexpected error")
def output_region_graphs_for_app(first_timestamp, output, capacity, data, label, app):
    """
    Create a graph per node attribute for the specified app with a series for each region.
    Creates files with names <label>-<attribute>-<app>.png
    
    Args:
        first_timestamp(datetime.datetime): when the simulation started 
        output (Path): output directory
        data (dict): region -> attr -> app -> timestamp -> value
        label (str): used to label the graph and generate the filenames
        capacity (dict): region -> attr -> app -> timestamp -> value
        app (str): service to generate the graph for
    """

    first_timestamp_ms = first_timestamp.timestamp() * 1000

    max_minutes = 0
    try:
        all_regions = set(capacity.keys())
        all_attrs = set()
        for region, region_data in data.items():
            all_attrs.update(region_data.keys())

        for attr in all_attrs:
            stack_xs = None
            stack_ys = list()
            stack_labels = list()
            
            frames = list()
            fig, ax = map_utils.subplots()
            ax.set_title(f"{attr} {label} for {app}")
            ax.set_xlabel("Time (minutes)")

            for region in sorted(all_regions):
                region_data = data.get(region, dict())
                attr_data = region_data.get(attr, dict())
                capacity_region = capacity.get(region, dict())

                if attr in capacity_region and 'QueueLength' != attr:
                    app_data = capacity_region[attr].get(app, dict())
                    if len(app_data) > 0:
                        pairs = sorted(app_data.items())
                        timestamps, values = zip(*pairs)
                        times = [map_utils.timestamp_to_minutes(float(t) - first_timestamp_ms) for t in timestamps]
                        max_minutes = max(max_minutes, max(times))

                        series_label=f"{region} capacity"
                        frames.append(pd.DataFrame(list(values), index=list(times), columns=[series_label]))
                        ax.plot(times, values, label=series_label)

                app_data = attr_data.get(app, dict())
                if len(app_data) > 0:
                    pairs = sorted(app_data.items())
                    timestamps, values = zip(*pairs)
                    times = [map_utils.timestamp_to_minutes(float(t) - first_timestamp_ms) for t in timestamps]
                    max_minutes = max(max_minutes, max(times))

                    frames.append(pd.DataFrame(list(values), index=list(times), columns=[region]))
                    ax.plot(times, values, label=region)
                    
                    if stack_xs is None:
                        stack_xs = times
                    stack_ys.append(values)
                    stack_labels.append(region)


            ax.set_xlim(left=0, right=max_minutes)
            handles, labels = ax.get_legend_handles_labels()
            lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

            output_name = output / f"{label}-{attr}-{app}.png"
            fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
            plt.close(fig)
            
            # write out CSV of the plot data
            df = pd.concat(frames, axis=1)
            df.to_csv(output / f"{label}-{attr}-{app}.csv", index_label="relative minutes")

            # create stacked plot
            fig, ax = map_utils.subplots()
            ax.set_title(f"{attr} {label} for {app}")
            ax.set_xlabel("Time (minutes)")
            ax.set_xlim(left=0, right=max_minutes)
            ax.stackplot(stack_xs, stack_ys, labels=stack_labels)

            handles, labels = ax.get_legend_handles_labels()
            lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

            output_name = output / f"{label}-{attr}-{app}_stacked.png"
            fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
            plt.close(fig)
            
    except:
        get_logger().exception("Unexpected error")