예제 #1
0
def sum_values(data):
    """
    Interpolate values where missing and sum data across NCPs

    Args:
        data (dict): attr -> app -> ncp -> timestamp -> value
    
    Returns:
        dict: attr -> app -> timestamp -> value
    """

    # first compute all times across all data
    all_times = set()
    for attr, attr_data in data.items():
        for app, app_data in attr_data.items():
            for node_name, time_data in app_data.items():
                all_times.update(time_data.keys())
    all_times = list(sorted(all_times))
    
    result = dict()
    for attr, attr_data in data.items():
        result_attr_data = dict()
        
        for app, app_data in attr_data.items():
            values_summed = np.zeros(len(all_times))
            for node_name, time_data in app_data.items():
                values_ar = map_utils.fill_missing_times(all_times, time_data)
                values_summed = np.add(values_summed, values_ar)
                
            result_attr_data[app] = dict(zip(all_times, values_summed))
        result[attr] = result_attr_data
    return result
예제 #2
0
def process_file(first_timestamp_ms, output, csv_file):
    ncp = csv_file.stem

    get_logger().debug("Processing %s ncp: %s", csv_file, ncp)

    df = pd.read_csv(csv_file)
    df['minutes'] = (df['timestamp'] - first_timestamp_ms) / 1000 / 60
    df = df.sort_values(by=['minutes'])

    max_minutes = df['minutes'].max()
    services = df['service'].unique()
    source_regions = df['source region'].unique()
    attributes = df['attribute'].unique()

    xdata = df['minutes'].unique()
    for service in sorted(services):
        get_logger().debug("Service: %s", service)
        for attr in attributes:
            get_logger().debug("attribute: %s", attr)

            fig, ax = map_utils.subplots()
            ax.set_title(f"Inferred demand for {service} and {attr} on {ncp}")
            ax.set_xlabel('time (minutes)')
            ax.set_xlim(left=0, right=max_minutes)

            ydata = list()
            labels = list()
            for source_region in sorted(source_regions):
                get_logger().debug("source region: %s", source_region)

                plot_data = df.loc[(df['service'] == service)
                                   & (df['source region'] == source_region) &
                                   (df['attribute'] == attr)]
                label = source_region
                time_data = pd.Series(plot_data['value'].values,
                                      index=plot_data['minutes']).to_dict()
                yfilled = map_utils.fill_missing_times(xdata, time_data)

                get_logger().debug("yseries len: %d", len(yfilled))
                ydata.append(yfilled)
                labels.append(label)

            get_logger().debug("xdata len: %d", len(xdata))
            get_logger().debug("ydata len: %d", len(ydata))
            ax.stackplot(xdata, ydata, labels=labels)
            handles, labels = ax.get_legend_handles_labels()
            lgd = ax.legend(handles,
                            labels,
                            bbox_to_anchor=(1.04, 1),
                            loc="upper left")

            output_name = output / f"{ncp}-{service}-{attr}.png"
            fig.savefig(output_name.as_posix(),
                        format='png',
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight')
            plt.close(fig)
예제 #3
0
def sum_capacity_values_by_region(data, node_region, container_node):
    """
    Interpolate capacity values where missing and sum data per region.

    Args:
        data (dict): attr -> app -> ncp -> timestamp -> value
        node_region (dict): ncp -> region
        container_node (dict): container -> ncp
    
    Returns:
        dict: region -> attr -> app -> timestamp -> value
    """

    # first compute all times across all data
    all_times = set()
    for attr, attr_data in data.items():
        for app, app_data in attr_data.items():
            for node_name, time_data in app_data.items():
                all_times.update(time_data.keys())
    all_times = list(sorted(all_times))

    regions = set(node_region.values())
    
    result = dict()
    for sum_region in regions:
        region_data = dict()
        
        for attr, attr_data in data.items():
            result_attr_data = dict()

            for app, app_data in attr_data.items():
                values_summed = np.zeros(len(all_times))

                for container_name, time_data in app_data.items():
                    ncp = container_node[container_name]
                    region = node_region[ncp]

                    if region == sum_region:
                        values_ar = map_utils.fill_missing_times(all_times, time_data)
                        values_summed = np.add(values_summed, values_ar)

                result_attr_data[app] = dict(zip(all_times, values_summed))
            region_data[attr] = result_attr_data
        result[sum_region] = region_data
        
    return result
예제 #4
0
def output_graphs(first_timestamp_ms, output, data, load_name):
    all_times = find_all_times(data)
    all_times_minutes = [(float(t) - first_timestamp_ms) / 1000.0 / 60.0
                         for t in all_times]
    max_minutes = max(all_times_minutes)

    for app, app_values in data.items():
        for attr, attr_values in app_values.items():
            fig, ax = map_utils.subplots()
            ax.set_title(f"{load_name} for service: {app} attribute: {attr}")
            ax.set_xlabel('time (minutes)')
            ax.set_xlim(left=0, right=max_minutes)

            plot_data = dict()
            for container_name, time_values in attr_values.items():
                time_values_minutes = dict()
                for timestamp, value in time_values.items():
                    time = (float(timestamp) -
                            first_timestamp_ms) / 1000.0 / 60.0
                    get_logger().debug("time %s timestamp %s first_time %s",
                                       time, timestamp, first_timestamp_ms)
                    time_values_minutes[time] = value

                yinterp = map_utils.fill_missing_times(all_times_minutes,
                                                       time_values_minutes)

                plot_data[container_name] = yinterp

            series_labels, ydata = zip(*sorted(plot_data.items()))
            plt.stackplot(all_times_minutes, ydata, labels=series_labels)

            handles, labels = ax.get_legend_handles_labels()
            lgd = ax.legend(handles,
                            labels,
                            bbox_to_anchor=(1.04, 1),
                            loc="upper left")

            output_name = output / f"container-{load_name}-{app}-{attr}.png"
            plt.savefig(output_name.as_posix(),
                        format='png',
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight')
            plt.close(fig)
예제 #5
0
def output_graphs(first_time, output, data, container_expected_queue_lengths,
                  load_name, container_queue_length_capacity):
    '''
    first_time (int): reference time in ms for run
    output: output folder for graphs
    data: resource report data
    container_expected_queue_lengths: estimated queeu lengths from processing latency csv files
    container_queue_length_capacity: queue length capacity for containers
    '''
    _, all_times = find_first_time(data)

    all_times_minutes = [(float(t) - first_time) / 1000.0 / 60.0
                         for t in all_times]
    get_logger().debug("First time %s", first_time)
    max_minutes = max(all_times_minutes)

    for app, app_values in data.items():
        container_data = dict()

        for attr, attr_values in app_values.items():
            if attr == "CPU" or attr == "QueueLength":
                for container_name, time_values in attr_values.items():
                    container_data.setdefault(container_name,
                                              dict())[attr] = time_values

        app_total_queue_lengths = 0
        app_total_queue_lengths_within_capacity = 0

        for container_name, attr_data in container_data.items():
            fig, ax = map_utils.subplots()
            ax.set_title(
                f"{load_name} for service: {app}, container: {container_name}")
            ax.set_xlabel('time (minutes)')
            ax.set_xlim(left=0, right=max_minutes)

            queue_lengths = 0
            queue_lengths_within_capacity = 0

            if container_queue_length_capacity:
                ax.set_ylim(top=2.0)

            for attr, time_values in attr_data.items():
                time_values_minutes = dict()
                for timestamp, value in time_values.items():
                    time = (float(timestamp) - first_time) / 1000.0 / 60.0
                    get_logger().debug("time %s timestamp %s first_time %s",
                                       time, timestamp, first_time)

                    if attr == "QueueLength" and container_queue_length_capacity != None:
                        queue_lengths += value
                        queue_lengths_within_capacity += min(
                            value, container_queue_length_capacity)
                        time_values_minutes[
                            time] = value / container_queue_length_capacity
                    else:
                        time_values_minutes[time] = value

                yinterp = map_utils.fill_missing_times(all_times_minutes,
                                                       time_values_minutes)

                times = sorted(list(time_values_minutes.keys()))
                values = list()

                for time in times:
                    values.append(time_values_minutes[time])

                if attr == "QueueLength":
                    #plt.scatter(times, values, label=attr, color='red', marker='o')
                    plt.plot(all_times_minutes,
                             yinterp,
                             label=attr,
                             color='red')
                else:
                    #plt.scatter(times, values, label=attr, marker='o')
                    plt.plot(all_times_minutes, yinterp, label=attr)

                get_logger().debug(
                    f'container_expected_queue_lengths.keys(): {container_expected_queue_lengths.keys()}'
                )

            # plot estimated queue lengthss
            if app in container_expected_queue_lengths.keys():
                times = sorted(
                    list(container_expected_queue_lengths[app]
                         [container_name].keys()))
                times_minutes = [(float(time) - first_time) / 1000.0 / 60.0
                                 for time in times]

                if container_queue_length_capacity != None:
                    queue_lengths = [
                        container_expected_queue_lengths[app][container_name]
                        [time] / container_queue_length_capacity
                        for time in times
                    ]
                else:
                    queue_lengths = [
                        container_expected_queue_lengths[app][container_name]
                        [time] for time in times
                    ]

                plt.plot(times_minutes,
                         queue_lengths,
                         label='Estimated queue length',
                         color='black')

            handles, labels = ax.get_legend_handles_labels()
            lgd = ax.legend(handles,
                            labels,
                            bbox_to_anchor=(1.04, 1),
                            loc="upper left")

            output_name = output / f"container-{load_name}-{app}-{container_name}-CPU_QueueLength.png"
            plt.savefig(output_name.as_posix(),
                        format='png',
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight')
            plt.close(fig)

            get_logger().info(
                f'container {container_name}: queue_lengths: {queue_lengths}, queue_lengths_within_capacity: {queue_lengths_within_capacity}, percent queue lengths within capacity: {[queue_lengths_within_capacity / queue_lengths if queue_lengths > 0 else float("NaN")]}'
            )

            app_total_queue_lengths += queue_lengths
            app_total_queue_lengths_within_capacity += queue_lengths_within_capacity

        get_logger().info(
            f'app {app}: queue_lengths: {app_total_queue_lengths}, queue_lengths_within_capacity: {app_total_queue_lengths_within_capacity}, percent queue lengths within capacity: {app_total_queue_lengths_within_capacity / app_total_queue_lengths}'
        )
예제 #6
0
def output_graphs_per_region(first_timestamp, output, capacity, data, label):
    """
    Create a graph per node attribute.
    Creates files with names <label>-<attribute>-<region>.png
    
    Args:
        first_timestamp(datetime.datetime): when the simulation started 
        output (Path): output directory
        data (dict): region -> attr -> app -> timestamp -> value
        label (str): used to label the graph and generate the filenames
        capacity: region -> attr -> app -> timestamp -> value
    """

    first_timestamp_ms = first_timestamp.timestamp() * 1000

    max_minutes = 0
    try:
        for region, region_data in data.items():
            capacity_region = capacity[region]
            
            for attr, attr_data in region_data.items():
                frames = list()
                fig, ax = map_utils.subplots()
                ax.set_title(f"{attr} {label} in {region}")
                ax.set_xlabel("Time (minutes)")
                
                stack_xs = None
                stack_ys = list()
                stack_labels = list()
                total_capacity = None
                if attr in capacity_region and 'QueueLength' != attr:
                    for app, app_data in sorted(capacity_region[attr].items()):
                        if len(app_data) > 0:
                            pairs = sorted(app_data.items())
                            timestamps, values = zip(*pairs)
                            times = [map_utils.timestamp_to_minutes(float(t) - first_timestamp_ms) for t in timestamps]
                            max_minutes = max(max_minutes, max(times))

                            series_label=f"{app} capacity"
                            frames.append(pd.DataFrame(list(values), index=list(times), columns=[series_label]))
                            ax.plot(times, values, label=series_label)

                            if total_capacity is None:
                                total_capacity = app_data.copy()
                            else:
                                total_capacity = {k: total_capacity.get(k, 0) + app_data.get(k, 0) for k in set(total_capacity) | set(app_data)}

                app_timestamps = None
                for app, app_data in sorted(attr_data.items()):
                    if len(app_data) > 0:
                        pairs = sorted(app_data.items())
                        timestamps, values = zip(*pairs)
                        times = [map_utils.timestamp_to_minutes(float(t) - first_timestamp_ms) for t in timestamps]
                        max_minutes = max(max_minutes, max(times))

                        if app_timestamps is None:
                            app_timestamps = timestamps
                        
                        frames.append(pd.DataFrame(list(values), index=list(times), columns=[app]))
                        ax.plot(times, values, label=app)

                        if stack_xs is None:
                            stack_xs = times
                        stack_ys.append(values)
                        stack_labels.append(app)

                ax.set_xlim(left=0, right=max_minutes)                        
                handles, labels = ax.get_legend_handles_labels()
                lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

                output_name = output / f"{label}-{attr}-{region}.png"
                fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
                plt.close(fig)
                
                # write out CSV of the plot data
                df = pd.concat(frames, axis=1)
                df.to_csv(output / f"{label}-{attr}-{region}.csv", index_label="relative minutes")

                if app_timestamps is not None and total_capacity is not None:
                    # create stacked plot
                    fig, ax = map_utils.subplots()
                    ax.set_title(f"{attr} {label} in {region}")
                    ax.set_xlabel("Time (minutes)")
                    ax.set_xlim(left=0, right=max_minutes)                        
                    ax.stackplot(stack_xs, stack_ys, labels=stack_labels)

                    total_capacity = map_utils.fill_missing_times(app_timestamps, total_capacity)
                    ax.plot(stack_xs, total_capacity, label="Total allocated capacity")

                    handles, labels = ax.get_legend_handles_labels()
                    lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left")

                    output_name = output / f"{label}-{attr}-{region}_stacked.png"
                    fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
                    plt.close(fig)
                elif 'QueueLength' != attr:
                    get_logger().warning("Missing data to draw stackplot for %s %s in %s", label, attr, region)
                
    except:
        get_logger().exception("Unexpected error")