def sum_values(data): """ Interpolate values where missing and sum data across NCPs Args: data (dict): attr -> app -> ncp -> timestamp -> value Returns: dict: attr -> app -> timestamp -> value """ # first compute all times across all data all_times = set() for attr, attr_data in data.items(): for app, app_data in attr_data.items(): for node_name, time_data in app_data.items(): all_times.update(time_data.keys()) all_times = list(sorted(all_times)) result = dict() for attr, attr_data in data.items(): result_attr_data = dict() for app, app_data in attr_data.items(): values_summed = np.zeros(len(all_times)) for node_name, time_data in app_data.items(): values_ar = map_utils.fill_missing_times(all_times, time_data) values_summed = np.add(values_summed, values_ar) result_attr_data[app] = dict(zip(all_times, values_summed)) result[attr] = result_attr_data return result
def process_file(first_timestamp_ms, output, csv_file): ncp = csv_file.stem get_logger().debug("Processing %s ncp: %s", csv_file, ncp) df = pd.read_csv(csv_file) df['minutes'] = (df['timestamp'] - first_timestamp_ms) / 1000 / 60 df = df.sort_values(by=['minutes']) max_minutes = df['minutes'].max() services = df['service'].unique() source_regions = df['source region'].unique() attributes = df['attribute'].unique() xdata = df['minutes'].unique() for service in sorted(services): get_logger().debug("Service: %s", service) for attr in attributes: get_logger().debug("attribute: %s", attr) fig, ax = map_utils.subplots() ax.set_title(f"Inferred demand for {service} and {attr} on {ncp}") ax.set_xlabel('time (minutes)') ax.set_xlim(left=0, right=max_minutes) ydata = list() labels = list() for source_region in sorted(source_regions): get_logger().debug("source region: %s", source_region) plot_data = df.loc[(df['service'] == service) & (df['source region'] == source_region) & (df['attribute'] == attr)] label = source_region time_data = pd.Series(plot_data['value'].values, index=plot_data['minutes']).to_dict() yfilled = map_utils.fill_missing_times(xdata, time_data) get_logger().debug("yseries len: %d", len(yfilled)) ydata.append(yfilled) labels.append(label) get_logger().debug("xdata len: %d", len(xdata)) get_logger().debug("ydata len: %d", len(ydata)) ax.stackplot(xdata, ydata, labels=labels) handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left") output_name = output / f"{ncp}-{service}-{attr}.png" fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.close(fig)
def sum_capacity_values_by_region(data, node_region, container_node): """ Interpolate capacity values where missing and sum data per region. Args: data (dict): attr -> app -> ncp -> timestamp -> value node_region (dict): ncp -> region container_node (dict): container -> ncp Returns: dict: region -> attr -> app -> timestamp -> value """ # first compute all times across all data all_times = set() for attr, attr_data in data.items(): for app, app_data in attr_data.items(): for node_name, time_data in app_data.items(): all_times.update(time_data.keys()) all_times = list(sorted(all_times)) regions = set(node_region.values()) result = dict() for sum_region in regions: region_data = dict() for attr, attr_data in data.items(): result_attr_data = dict() for app, app_data in attr_data.items(): values_summed = np.zeros(len(all_times)) for container_name, time_data in app_data.items(): ncp = container_node[container_name] region = node_region[ncp] if region == sum_region: values_ar = map_utils.fill_missing_times(all_times, time_data) values_summed = np.add(values_summed, values_ar) result_attr_data[app] = dict(zip(all_times, values_summed)) region_data[attr] = result_attr_data result[sum_region] = region_data return result
def output_graphs(first_timestamp_ms, output, data, load_name): all_times = find_all_times(data) all_times_minutes = [(float(t) - first_timestamp_ms) / 1000.0 / 60.0 for t in all_times] max_minutes = max(all_times_minutes) for app, app_values in data.items(): for attr, attr_values in app_values.items(): fig, ax = map_utils.subplots() ax.set_title(f"{load_name} for service: {app} attribute: {attr}") ax.set_xlabel('time (minutes)') ax.set_xlim(left=0, right=max_minutes) plot_data = dict() for container_name, time_values in attr_values.items(): time_values_minutes = dict() for timestamp, value in time_values.items(): time = (float(timestamp) - first_timestamp_ms) / 1000.0 / 60.0 get_logger().debug("time %s timestamp %s first_time %s", time, timestamp, first_timestamp_ms) time_values_minutes[time] = value yinterp = map_utils.fill_missing_times(all_times_minutes, time_values_minutes) plot_data[container_name] = yinterp series_labels, ydata = zip(*sorted(plot_data.items())) plt.stackplot(all_times_minutes, ydata, labels=series_labels) handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left") output_name = output / f"container-{load_name}-{app}-{attr}.png" plt.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.close(fig)
def output_graphs(first_time, output, data, container_expected_queue_lengths, load_name, container_queue_length_capacity): ''' first_time (int): reference time in ms for run output: output folder for graphs data: resource report data container_expected_queue_lengths: estimated queeu lengths from processing latency csv files container_queue_length_capacity: queue length capacity for containers ''' _, all_times = find_first_time(data) all_times_minutes = [(float(t) - first_time) / 1000.0 / 60.0 for t in all_times] get_logger().debug("First time %s", first_time) max_minutes = max(all_times_minutes) for app, app_values in data.items(): container_data = dict() for attr, attr_values in app_values.items(): if attr == "CPU" or attr == "QueueLength": for container_name, time_values in attr_values.items(): container_data.setdefault(container_name, dict())[attr] = time_values app_total_queue_lengths = 0 app_total_queue_lengths_within_capacity = 0 for container_name, attr_data in container_data.items(): fig, ax = map_utils.subplots() ax.set_title( f"{load_name} for service: {app}, container: {container_name}") ax.set_xlabel('time (minutes)') ax.set_xlim(left=0, right=max_minutes) queue_lengths = 0 queue_lengths_within_capacity = 0 if container_queue_length_capacity: ax.set_ylim(top=2.0) for attr, time_values in attr_data.items(): time_values_minutes = dict() for timestamp, value in time_values.items(): time = (float(timestamp) - first_time) / 1000.0 / 60.0 get_logger().debug("time %s timestamp %s first_time %s", time, timestamp, first_time) if attr == "QueueLength" and container_queue_length_capacity != None: queue_lengths += value queue_lengths_within_capacity += min( value, container_queue_length_capacity) time_values_minutes[ time] = value / container_queue_length_capacity else: time_values_minutes[time] = value yinterp = map_utils.fill_missing_times(all_times_minutes, time_values_minutes) times = sorted(list(time_values_minutes.keys())) values = list() for time in times: values.append(time_values_minutes[time]) if attr == "QueueLength": #plt.scatter(times, values, label=attr, color='red', marker='o') plt.plot(all_times_minutes, yinterp, label=attr, color='red') else: #plt.scatter(times, values, label=attr, marker='o') plt.plot(all_times_minutes, yinterp, label=attr) get_logger().debug( f'container_expected_queue_lengths.keys(): {container_expected_queue_lengths.keys()}' ) # plot estimated queue lengthss if app in container_expected_queue_lengths.keys(): times = sorted( list(container_expected_queue_lengths[app] [container_name].keys())) times_minutes = [(float(time) - first_time) / 1000.0 / 60.0 for time in times] if container_queue_length_capacity != None: queue_lengths = [ container_expected_queue_lengths[app][container_name] [time] / container_queue_length_capacity for time in times ] else: queue_lengths = [ container_expected_queue_lengths[app][container_name] [time] for time in times ] plt.plot(times_minutes, queue_lengths, label='Estimated queue length', color='black') handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left") output_name = output / f"container-{load_name}-{app}-{container_name}-CPU_QueueLength.png" plt.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.close(fig) get_logger().info( f'container {container_name}: queue_lengths: {queue_lengths}, queue_lengths_within_capacity: {queue_lengths_within_capacity}, percent queue lengths within capacity: {[queue_lengths_within_capacity / queue_lengths if queue_lengths > 0 else float("NaN")]}' ) app_total_queue_lengths += queue_lengths app_total_queue_lengths_within_capacity += queue_lengths_within_capacity get_logger().info( f'app {app}: queue_lengths: {app_total_queue_lengths}, queue_lengths_within_capacity: {app_total_queue_lengths_within_capacity}, percent queue lengths within capacity: {app_total_queue_lengths_within_capacity / app_total_queue_lengths}' )
def output_graphs_per_region(first_timestamp, output, capacity, data, label): """ Create a graph per node attribute. Creates files with names <label>-<attribute>-<region>.png Args: first_timestamp(datetime.datetime): when the simulation started output (Path): output directory data (dict): region -> attr -> app -> timestamp -> value label (str): used to label the graph and generate the filenames capacity: region -> attr -> app -> timestamp -> value """ first_timestamp_ms = first_timestamp.timestamp() * 1000 max_minutes = 0 try: for region, region_data in data.items(): capacity_region = capacity[region] for attr, attr_data in region_data.items(): frames = list() fig, ax = map_utils.subplots() ax.set_title(f"{attr} {label} in {region}") ax.set_xlabel("Time (minutes)") stack_xs = None stack_ys = list() stack_labels = list() total_capacity = None if attr in capacity_region and 'QueueLength' != attr: for app, app_data in sorted(capacity_region[attr].items()): if len(app_data) > 0: pairs = sorted(app_data.items()) timestamps, values = zip(*pairs) times = [map_utils.timestamp_to_minutes(float(t) - first_timestamp_ms) for t in timestamps] max_minutes = max(max_minutes, max(times)) series_label=f"{app} capacity" frames.append(pd.DataFrame(list(values), index=list(times), columns=[series_label])) ax.plot(times, values, label=series_label) if total_capacity is None: total_capacity = app_data.copy() else: total_capacity = {k: total_capacity.get(k, 0) + app_data.get(k, 0) for k in set(total_capacity) | set(app_data)} app_timestamps = None for app, app_data in sorted(attr_data.items()): if len(app_data) > 0: pairs = sorted(app_data.items()) timestamps, values = zip(*pairs) times = [map_utils.timestamp_to_minutes(float(t) - first_timestamp_ms) for t in timestamps] max_minutes = max(max_minutes, max(times)) if app_timestamps is None: app_timestamps = timestamps frames.append(pd.DataFrame(list(values), index=list(times), columns=[app])) ax.plot(times, values, label=app) if stack_xs is None: stack_xs = times stack_ys.append(values) stack_labels.append(app) ax.set_xlim(left=0, right=max_minutes) handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left") output_name = output / f"{label}-{attr}-{region}.png" fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight') plt.close(fig) # write out CSV of the plot data df = pd.concat(frames, axis=1) df.to_csv(output / f"{label}-{attr}-{region}.csv", index_label="relative minutes") if app_timestamps is not None and total_capacity is not None: # create stacked plot fig, ax = map_utils.subplots() ax.set_title(f"{attr} {label} in {region}") ax.set_xlabel("Time (minutes)") ax.set_xlim(left=0, right=max_minutes) ax.stackplot(stack_xs, stack_ys, labels=stack_labels) total_capacity = map_utils.fill_missing_times(app_timestamps, total_capacity) ax.plot(stack_xs, total_capacity, label="Total allocated capacity") handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, bbox_to_anchor=(1.04, 1), loc="upper left") output_name = output / f"{label}-{attr}-{region}_stacked.png" fig.savefig(output_name.as_posix(), format='png', bbox_extra_artists=(lgd,), bbox_inches='tight') plt.close(fig) elif 'QueueLength' != attr: get_logger().warning("Missing data to draw stackplot for %s %s in %s", label, attr, region) except: get_logger().exception("Unexpected error")