def process_logfile(writer, node, agent_logfile): initial_pull_timestamp = None pull_attempts = 0 current_image = None with open(agent_logfile, 'r') as f: for line in f: pull_match = pull_re.match(line) pull_fail_match = pull_fail_re.match(line) pull_result_match = pull_result_re.match(line) if pull_match is not None: initial_pull_timestamp = map_utils.log_timestamp_to_datetime( pull_match.group('timestamp')) current_image = pull_match.group('image') pull_attempts = 1 elif pull_fail_match is not None: pull_attempts = pull_attempts + 1 elif pull_result_match is not None: final_timestamp = map_utils.log_timestamp_to_datetime( pull_result_match.group('timestamp')) duration = final_timestamp - initial_pull_timestamp writer.writerow([ node, initial_pull_timestamp, final_timestamp, duration.total_seconds(), pull_attempts, pull_result_match.group('result'), current_image ]) initial_pull_timestamp = None pull_attempts = 0 current_image = None
def main_method(args): sim_output = Path(args.sim_output) if not sim_output.exists(): get_logger().error("%s does not exist", sim_output) return 1 with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) get_logger().info("Simulation started at %s", first_timestamp) ip_info = gather_network_ips(sim_output) get_logger().debug("ip_info: %s", ip_info) first_timestamp_secs = first_timestamp.timestamp() output = Path(args.output) output.mkdir(parents=True, exist_ok=True) with multiprocessing.Pool(processes=os.cpu_count()) as pool: results = list() for node_dir in sim_output.iterdir(): if node_dir.is_dir(): results.append(pool.apply_async(func=process_node_dir, args=[args.unfiltered, output, node_dir, first_timestamp_secs, ip_info])) for result in results: result.wait()
def main_method(args): sim_output = Path(args.sim_output) if not sim_output.exists(): get_logger().error("%s does not exist", sim_output) return 1 with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) get_logger().info("Simulation started at %s", first_timestamp) first_timestamp_ms = first_timestamp.timestamp() * 1000 output = Path(args.output) output.mkdir(parents=True, exist_ok=True) # client -> service -> timestamp -> ServiceCounts (client_data_sums, client_data_counts, server_data_sums, server_data_counts) = gather_data(sim_output) # client graphs client_data_sums_relative = relative_timestamps_client(client_data_sums, first_timestamp_ms) output_per_node_graphs(output, client_data_sums_relative, 'Client') # client service graphs client_service_data_sums = compute_per_service(client_data_counts) client_service_data_sums_relative = relative_timestamps_service(client_service_data_sums, first_timestamp_ms) output_node_graph(output, None, client_service_data_sums_relative, 'Client') # server graphs server_data_sums_relative = relative_timestamps_client(server_data_sums, first_timestamp_ms) output_per_node_graphs(output, server_data_sums_relative, 'Server') # server service graphs server_service_data_sums = compute_per_service(server_data_counts) server_service_data_sums_relative = relative_timestamps_service(server_service_data_sums, first_timestamp_ms) output_node_graph(output, None, server_service_data_sums_relative, 'Server')
def main_method(args): with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) get_logger().info("Simulation started at %s", first_timestamp) first_timestamp_ms = first_timestamp.timestamp() * 1000 window_size = int(args.window_size) get_logger().debug("Window size: %d", window_size) charts_dir = Path(args.chart_output) sim_output = Path(args.sim_output) output = Path(args.output) output.mkdir(parents=True, exist_ok=True) host_ip = pd.read_csv(sim_output / 'inputs/scenario/host-ip.csv') host_ip['host lower'] = host_ip['host'].str.lower() subnet_to_region = generate_hopcount_graphs.load_subnet_information(sim_output) frames = list() latency_dns = charts_dir / 'latency_dns' for filename in latency_dns.glob('client_processing_latency-*.csv'): get_logger().debug("Reading %s", filename) match = re.match(r'client_processing_latency-(.*).csv', filename.name) if not match: raise RuntimeError("Could not find service name in " + filename) service = match.group(1) df = pd.read_csv(filename) if len(df.index) < 1: get_logger().warning("No data found in %s", filename) else: df['service'] = service frames.append(df) if len(frames) < 1: get_logger().error("No data to process") return 0 df = pd.concat(frames) df = process_data(first_timestamp_ms, host_ip, subnet_to_region, window_size, df) all_statuses = df['message'].unique() for client_region in df['client region'].unique(): client_data = df[df['client region'] == client_region] services = df['service'].unique() for service in services: service_data = client_data[client_data['service'] == service] graph_request_result(output, all_statuses, service_data, client_region, service) return 0
def main_method(args): with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) first_timestamp_ms = first_timestamp.timestamp() * 1000 get_logger().info("Simulation started at %s -> %d", first_timestamp, first_timestamp_ms) output = Path(args.output) / 'inferred-demand' if not output.exists(): get_logger().error( "%s does not exist and is required to read the CSV files", output) for csv_file in output.glob('*.csv'): process_file(first_timestamp_ms, output, csv_file)
def main_method(args): chart_output = Path(args.chart_output) if not chart_output.exists(): get_logger().error("%s does not exist", chart_output) return 1 with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) min_time = first_timestamp.timestamp() * 1000 output_dir = Path(args.output) output_dir.mkdir(parents=True, exist_ok=True) network_dir = chart_output / 'network' node_data = load_node_data(network_dir, min_time) if node_data is None: get_logger().error("No node data, cannot continue") return 1 container_data = load_container_data(network_dir, min_time) with multiprocessing.Pool(processes=os.cpu_count()) as pool: results = list() for node in node_data['node'].unique(): results.append( pool.apply_async(func=process_node, args=[output_dir, node_data, node])) results.append( pool.apply_async(func=process_node_per_service, args=[output_dir, node_data, node])) for container in container_data['container'].unique(): results.append( pool.apply_async(func=process_container, args=[output_dir, container_data, container])) results.append( pool.apply_async(func=process_container_per_service, args=[output_dir, container_data, container])) for result in results: result.wait()
def main_method(args): with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) get_logger().info("Simulation started at %s", first_timestamp) all_requests_path = Path(args.input) if not all_requests_path.exists(): get_logger().error("Cannot read all requests file at %s", all_requests_path) return -1 output = Path(args.output) output.mkdir(parents=True, exist_ok=True) sim_output = Path(args.sim_output) regions = dict() parse_region_subnets(sim_output, regions) all_requests = dict() # service -> ClientRequest with open(all_requests_path) as f: reader = csv.DictReader(map_utils.skip_null_lines(f)) for row in reader: ts_str = row['time_sent'] service = row['service'] timestamp = datetime.datetime.fromtimestamp(int(ts_str) / 1000, tz=pytz.UTC) dest_ip = row['server'] dest_region = summarize_hifi.region_for_ip(regions, dest_ip) service_requests = all_requests.get(service, list()) req = ClientRequest(timestamp, dest_region) service_requests.append(req) all_requests[service] = service_requests window_duration = datetime.timedelta(minutes=args.window_duration) for service, service_requests in all_requests.items(): process_service(output, first_timestamp, regions, window_duration, service, service_requests)
def main_method(args): sim_output = Path(args.sim_output) if not sim_output.exists(): get_logger().error("%s does not exist", sim_output) return 1 scenario_dir = Path(args.scenario) if not scenario_dir.exists(): get_logger().error("%s does not exist", scenario_dir) return 1 with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) first_timestamp_ms = first_timestamp.timestamp() * 1000 get_logger().info("Simulation started at %s -> %d", first_timestamp, first_timestamp_ms) output = Path(args.output) output.mkdir(parents=True, exist_ok=True) show_load_demand_attr = args.show_load_demand_attr if args.show_load_demand_attr != '' else None all_services = map_utils.gather_all_services(scenario_dir) with multiprocessing.Pool(processes=os.cpu_count()) as pool: results = list() for node_dir in sim_output.iterdir(): if not node_dir.is_dir(): continue node_name_dir = map_utils.find_ncp_folder(node_dir) if node_name_dir is None: get_logger().debug("No NCP folder found in %s", node_dir) continue get_logger().debug(f"node_dir: {node_dir}") results.append(pool.apply_async(func=process_node, args=[output, first_timestamp_ms, all_services, node_name_dir, show_load_demand_attr])) for result in results: result.wait()
def process_client_dir(client_dir): unknown_host_re = re.compile(r'(?P<timestamp>\d{4}-\d{2}-\d{2}/\d{2}:\d{2}:\d{2}\.\d{3}/\S+).*Unable to find host (?P<host>\S+) after (?P<duration>\d+) ms') acceptable_delta = datetime.timedelta(seconds=1) prev_long_failure = dict() # name -> timestamp logfiles = sorted(client_dir.glob("map-client*.log"), key=lambda f: f.stat().st_mtime) with fileinput.input(files=logfiles) as f: for line in f: match = unknown_host_re.match(line) if match: timestamp = map_utils.log_timestamp_to_datetime(match.group('timestamp')) hostname = match.group('host') duration = int(match.group('duration')) if duration > 0: prev_long_failure[hostname] = timestamp elif duration == 0: if hostname not in prev_long_failure: get_logger().warning("Lookup at %s of %s took zero time and there is no previous long lookup", timestamp, hostname) else: prev_timestamp = prev_long_failure[hostname] diff = timestamp - prev_timestamp if diff > acceptable_delta: get_logger().warning("Lookup at %s of %s took zero time and the last lookup was at %s", timestamp, hostname, prev_timestamp)
def main(argv=None): if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser() parser.add_argument("-l", "--logconfig", dest="logconfig", help="logging configuration (default: logging.json)", default='logging.json') parser.add_argument("-c", "--chart_output", dest="chart_output", help="Chart output directory (Required)", required=True) parser.add_argument("-o", "--output", dest="output", help="Output directory (Required)", required=True) parser.add_argument( "--first-timestamp-file", dest="first_timestamp_file", help= "Path to file containing the log timestamp that the simulation started", required=True) parser.add_argument( "--sample-duration", dest="sample_duration", help= "Number of minutes between samples of plans for expected percentages plots", default=1, type=float) parser.add_argument("--scenario", dest="scenario", help="Scenario directory (Required)", required=True) args = parser.parse_args(argv) map_utils.setup_logging(default_path=args.logconfig) chart_output = Path(args.chart_output) if not chart_output.exists(): get_logger().error("%s does not exist", chart_output) return 1 with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) get_logger().info("Simulation started at %s", first_timestamp) output_dir = Path(args.output) output_dir.mkdir(parents=True, exist_ok=True) dcop_file = chart_output / 'dcop_plan_updates/all_dcop_plans.csv' dcop_plans = pd.read_csv(dcop_file, na_values="?") dcop_plans.fillna(0, inplace=True) dcop_plans.sort_values(by=['time'], inplace=True) rlg_file = chart_output / 'rlg_plan_updates/all_rlg_overflow_plans.csv' rlg_plans = pd.read_csv(rlg_file, na_values="?") rlg_plans.fillna(0, inplace=True) rlg_plans.sort_values(by=['time'], inplace=True) dns_file = chart_output / 'dns_region_plan_updates/all_dns_region_plans.csv' dns_plans = pd.read_csv(dns_file, na_values="?") dns_plans.fillna(0, inplace=True) dns_plans.sort_values(by=['time'], inplace=True) first_timestamp_ms = first_timestamp.timestamp() * 1000 dcop_plans['time_minutes'] = (dcop_plans['time'] - first_timestamp_ms) / 1000 / 60 rlg_plans['time_minutes'] = (rlg_plans['time'] - first_timestamp_ms) / 1000 / 60 dns_plans['time_minutes'] = (dns_plans['time'] - first_timestamp_ms) / 1000 / 60 all_regions = np.union1d(dcop_plans['plan_region'].unique(), rlg_plans['plan_region'].unique()) all_services = np.union1d(dcop_plans['service'].unique(), rlg_plans['service'].unique()) for region in all_regions: for service in all_services: plot_region_service_plan_type(output_dir, "DCOP", dcop_plans, all_regions, region, service) plot_region_service_plan_type(output_dir, "RLG", rlg_plans, all_regions, region, service) plot_region_service_plan_type(output_dir, "DNS", dns_plans, all_regions, region, service) scenario_dir = Path(args.scenario) if not scenario_dir.exists(): get_logger().error("%s does not exist", scenario_dir) return 1 client_regions = compute_client_regions(scenario_dir) # plot expected percentages sample_duration = args.sample_duration for source_region in client_regions: for service in all_services: times, region_weights = compute_expected(sample_duration, all_regions, 'DCOP', dcop_plans, source_region, service) plot_expected(output_dir, 'DCOP', source_region, service, times, region_weights) times, region_weights = compute_expected(sample_duration, all_regions, 'RLG', rlg_plans, source_region, service) plot_expected(output_dir, 'RLG', source_region, service, times, region_weights) times, region_weights = compute_expected(sample_duration, all_regions, 'DNS', dns_plans, source_region, service) plot_expected(output_dir, 'DNS', source_region, service, times, region_weights) get_logger().info("Finished")
def main(argv=None): if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser() parser.add_argument("-l", "--logconfig", dest="logconfig", help="logging configuration (default: logging.json)", default='logging.json') parser.add_argument("-s", "--sim-output", dest="sim_output", help="Chart output directory (Required)", required=True) parser.add_argument("-o", "--output", dest="output", help="Output directory (Required)", required=True) parser.add_argument( "--first-timestamp-file", dest="first_timestamp_file", help= "Path to file containing the log timestamp that the simulation started", required=True) args = parser.parse_args(argv) map_utils.setup_logging(default_path=args.logconfig) sim_output = Path(args.sim_output) if not sim_output.exists(): get_logger().error("%s does not exist", sim_output) return 1 with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) get_logger().info("Simulation started at %s", first_timestamp) first_timestamp_ms = first_timestamp.timestamp() * 1000 output = Path(args.output) output.mkdir(parents=True, exist_ok=True) # app -> node_attr -> container_name -> timestamp -> value data_load = dict() data_demand = dict() for node_dir in sim_output.iterdir(): if not node_dir.is_dir(): continue get_logger().debug("Processing node %s", node_dir) agent_dir = node_dir / 'agent' if agent_dir.is_dir(): get_logger().debug("\tAgent dir %s", agent_dir) for node_name_dir in agent_dir.iterdir(): if not node_name_dir.is_dir(): continue for time_dir in node_name_dir.iterdir(): if not time_dir.is_dir(): continue add_time_dir_data(time_dir, data_load, data_demand) else: for time_dir in node_dir.iterdir(): if not time_dir.is_dir(): continue add_time_dir_data(time_dir, data_load, data_demand) output_graphs(first_timestamp_ms, output, data_load, "load") output_graphs(first_timestamp_ms, output, data_demand, "demand")
def main_method(args): sim_output = Path(args.sim_output) if not sim_output.exists(): get_logger().error("%s does not exist", sim_output) return 1 with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) first_timestamp_ms = first_timestamp.timestamp() * 1000 get_logger().info("Simulation started at %s -> %d", first_timestamp, first_timestamp_ms) output = Path(args.output) / 'rlg-resource-report-lag' output.mkdir(parents=True, exist_ok=True) for node_dir in sim_output.iterdir(): max_diff = None if not node_dir.is_dir(): continue get_logger().debug("Processing node %s", node_dir) node_name_dir = map_utils.find_ncp_folder(node_dir) if node_name_dir is None: get_logger().debug("No NCP folder found") continue ncp = map_utils.node_name_from_dir(node_name_dir) all_known_nodes = set() xs = list() ys = dict() # report node -> series of diffs for time_dir in sorted(node_name_dir.iterdir()): if not time_dir.is_dir(): continue directory_time = int(time_dir.stem) get_logger().debug("\t\tProcessing time %s", time_dir) resource_report_file = time_dir / 'regionResourceReports-SHORT.json' if resource_report_file.exists(): try: with open(resource_report_file, 'r') as f: resource_reports = json.load(f) except json.decoder.JSONDecodeError: get_logger().warning("Problem reading %s, skipping", resource_report_file) continue xs.append(directory_time) seen_nodes = set() for resource_report in resource_reports: report_time = int(resource_report['timestamp']) if report_time < 1: # skip reports that we don't have a rational time for diff = None else: diff = map_utils.timestamp_to_seconds(directory_time - report_time) node = resource_report['nodeName']['name'] if node in seen_nodes: get_logger().warning( "Saw multiple reports from %s in %s, skipping the second one", node, time_dir) continue seen_nodes.add(node) all_known_nodes.add(node) # default value is setup to ensure that newly discovered # nodes have a list of values the same length as the other lists node_series = ys.get(node, [None] * (len(xs) - 1)) node_series.append(diff) ys[node] = node_series get_logger().debug("Added %s to %s xs: %d node_series: %d", diff, node, len(xs), len(node_series)) if max_diff is None or diff > max_diff: max_diff = diff # make sure we skip values for any nodes that we should have seen missing_nodes = all_known_nodes - seen_nodes if len(missing_nodes) > 0: get_logger().debug("Missing nodes: %s Seen: %s", missing_nodes, seen_nodes) for missing_node in missing_nodes: # the key must exist by now node_series = ys[missing_node] node_series.append(None) ys[missing_node] = node_series get_logger().debug("Added None to %s", missing_node) for node, node_series in ys.items(): if len(xs) != len(node_series): raise RuntimeError( f"List sizes not correct for {node} {len(xs)} != {len(node_series)}" ) if len(xs) > 0: get_logger().info("Maximum diff for %s is %s ms", ncp, max_diff) output_graph(output, ncp, xs, ys, first_timestamp_ms)
def main_method(args): sim_output = Path(args.sim_output) if not sim_output.exists(): get_logger().error("%s does not exist", sim_output) return 1 with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) earliest_time = first_timestamp.timestamp() * 1000 output_dir = Path(args.output) output_dir.mkdir(parents=True, exist_ok=True) # read all of the data in client_frames = list() server_frames = list() with multiprocessing.Pool(processes=os.cpu_count()) as pool: client_results = list() server_results = list() for node_dir in sim_output.iterdir(): if not node_dir.is_dir(): continue client_metrics_dir = node_dir / 'client/container_data' if client_metrics_dir.exists() and client_metrics_dir.is_dir(): result = pool.apply_async(func=process_client, args=[node_dir.name, client_metrics_dir]) client_results.append(result) server_container_data = node_dir / 'agent/container_data' if server_container_data.exists() and server_container_data.is_dir(): result = pool.apply_async(func=process_server, args=[node_dir.name, server_container_data]) server_results.append(result) for result in client_results: frames = result.get() client_frames.extend(frames) for result in server_results: (s_frames, c_frames) = result.get() server_frames.extend(s_frames) client_frames.extend(c_frames) if len(client_frames) > 0: client_data = pd.concat(client_frames, ignore_index=True) client_data['time_minutes'] = (client_data['time_ack_received'] - earliest_time) / 1000 / 60 client_data['latency_seconds'] = client_data['latency'] / 1000 all_clients = client_data['client'].unique() get_logger().debug("Client services {}".format(client_data['service'].unique())) with open(output_dir / 'all_client_requests.csv', 'w') as f: client_data.to_csv(f, index=False) else: get_logger().warning("No client data") if len(server_frames) > 0: server_data = pd.concat(server_frames, ignore_index=True) server_data['time_minutes'] = (server_data['timestamp'] - earliest_time) / 1000 / 60 server_data['latency_seconds'] = server_data['latency'] / 1000 get_logger().debug(server_data.head()) get_logger().debug("Server services {}".format(server_data['service'].unique())) else: get_logger().warning("No server data") if len(server_frames) < 1 or len(client_frames) < 1: get_logger().error("Need both client and server data to write out remaining files, skipping remaining outputs") return 0 all_services = np.unique(np.concatenate([client_data['service'].unique(), server_data['service'].unique()], axis=0)) get_logger().debug("all services {}".format(all_services)) write_client_data(output_dir, all_clients, all_services, client_data) write_per_client(output_dir, all_services, all_clients, client_data) write_server_data(output_dir, all_services, server_data) write_combined(output_dir, all_services, server_data, client_data) write_combined_per_service(output_dir, all_services, server_data, client_data)
def main(argv=None): if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser() parser.add_argument("-l", "--logconfig", dest="logconfig", help="logging configuration (default: logging.json)", default='logging.json') parser.add_argument( "--first-timestamp-file", dest="first_timestamp_file", help= "Path to file containing the log timestamp that the simulation started", required=True) parser.add_argument("-s", "--sim-output", dest="sim_output", help="Chart output directory (Required)", required=True) parser.add_argument("-o", "--output", dest="output", help="Output directory (Required)", required=True) parser.add_argument("-c", "--container-capacity", dest="container_queue_length_capacity", help="QueueLength capacity per container", default=None) parser.add_argument( "--process-latency-data", dest="process_latency_data", action='store_true', help= "If present, show queue length estimate from processing_latency.csv files." ) args = parser.parse_args(argv) map_utils.setup_logging(default_path=args.logconfig) with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) first_timestamp_ms = first_timestamp.timestamp() * 1000 get_logger().info("Run started at %s", first_timestamp) sim_output = Path(args.sim_output) if not sim_output.exists(): get_logger().error("%s does not exist", sim_output) return 1 output = Path(args.output) output.mkdir(parents=True, exist_ok=True) container_queue_length_capacity = int( args.container_queue_length_capacity ) if args.container_queue_length_capacity else None process_latency_data = args.process_latency_data # app -> node_attr -> container_name -> timestamp -> value data_load = dict() data_demand = dict() container_processing_latency = dict() for node_dir in sim_output.iterdir(): if not node_dir.is_dir(): continue get_logger().debug("Processing node %s", node_dir) agent_dir = node_dir / 'agent' if not agent_dir.is_dir(): continue get_logger().debug("\tAgent dir %s", agent_dir) for node_name_dir in agent_dir.iterdir(): if not node_name_dir.is_dir(): continue for time_dir in node_name_dir.iterdir(): if not time_dir.is_dir(): continue get_logger().debug("\t\tProcessing time %s", time_dir) resource_report_file = time_dir / 'resourceReport-SHORT.json' if resource_report_file.exists(): try: with open(resource_report_file, 'r') as f: resource_report = json.load(f) time = int(time_dir.stem) if 'containerReports' in resource_report: for container_name, container_report in resource_report[ 'containerReports'].items(): container_name_short = container_name.split( '.')[0] process_container_report( data_load, data_demand, container_name_short, time, container_report) except json.decoder.JSONDecodeError: get_logger().warning("Problem reading %s, skipping", resource_report_file) container_data_dir = agent_dir / 'container_data' if process_latency_data: for container_data_service_dir in container_data_dir.iterdir(): match = re.search(r'^(.+)\.([^\._]+)_(.+)$', container_data_service_dir.name) if match: artifact = match.group(2) for container_data_service_name_dir in container_data_service_dir.iterdir( ): match = re.search(r'^([^\.]+)\.map\.dcomp$', container_data_service_name_dir.name) if match: container_name = match.group(1) for container_data_service_name_instance_dir in container_data_service_name_dir.iterdir( ): processing_latency_file = container_data_service_name_instance_dir / 'app_metrics_data' / 'processing_latency.csv' container_instance_latency_data = dict() with open(processing_latency_file, "r") as f: reader = csv.DictReader(f) for row in reader: event = row['event'] time_start = int(row['time_received']) time_end = int(row['time_ack_sent']) if event == 'request_success': container_instance_latency_data[ time_start] = 1 container_instance_latency_data[ time_end] = -1 container_processing_latency.setdefault( artifact, dict()).setdefault( container_name, list()).append( container_instance_latency_data) container_expected_queue_lengths = accumulate_expected_queue_length( container_processing_latency) output_graphs(first_timestamp_ms, output, data_load, container_expected_queue_lengths, "load", container_queue_length_capacity)
def main_method(args): with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) get_logger().info("Simulation started at %s", first_timestamp) sim_output = Path(args.sim_output) if not sim_output.exists(): get_logger().error("%s does not exist", sim_output) return 1 scenario_dir = Path(args.scenario) if not scenario_dir.exists(): get_logger().error("%s does not exist", scenario_dir) return 1 output = Path(args.output) output.mkdir(parents=True, exist_ok=True) all_apps = map_utils.gather_all_services(scenario_dir) # container_name -> ncp_name container_node = dict() # node_attr -> app -> ncp_name -> timestamp -> value data_load = dict() data_demand_short = dict() data_demand_long = dict() # node_attr -> app -> container_name -> timestamp -> value data_capacity = dict() for node_dir in sim_output.iterdir(): if not node_dir.is_dir(): continue process_node(first_timestamp, all_apps, data_load, data_demand_short, data_demand_long, data_capacity, node_dir, container_node) results = list() ## process raw data get_logger().info("Processing raw data") # ncp_name -> region_name node_region = map_utils.gather_region_info(scenario_dir) get_logger().debug("node_region: %s", node_region) get_logger().info("Summing values by region") data_load_per_region = sum_values_by_region(data_load, node_region) data_demand_short_per_region = sum_values_by_region(data_demand_short, node_region) data_demand_long_per_region = sum_values_by_region(data_demand_long, node_region) capacity_per_region = sum_capacity_values_by_region(data_capacity, node_region, container_node) del container_node del node_region get_logger().info("Summing load") #get_logger().debug("Data load: %s", data_load) # node_attr -> app -> timestamp -> value data_load_summed = sum_values(data_load) #get_logger().debug("Data load summed: %s", data_load_summed) del data_load get_logger().info("Summing demand") # node_attr -> app -> timestamp -> value data_demand_short_summed = sum_values(data_demand_short) data_demand_long_summed = sum_values(data_demand_long) del data_demand_short del data_demand_long #get_logger().debug("Data capacity: %s", data_capacity) # node_attr -> app -> timestamp -> value data_capacity_summed = sum_values(data_capacity) #get_logger().debug("Data capacity summed: %s", data_capacity_summed) del data_capacity get_logger().info("Starting graph creation") output_graphs_per_region(first_timestamp, output, capacity_per_region, data_load_per_region, 'load') output_graphs_per_region(first_timestamp, output, capacity_per_region, data_demand_short_per_region, 'demand-short') output_graphs_per_region(first_timestamp, output, capacity_per_region, data_demand_long_per_region, 'demand-long') output_capacity_graph_per_region(first_timestamp, output, capacity_per_region) for app in all_apps: output_region_graphs_for_app(first_timestamp, output, capacity_per_region, data_load_per_region, 'load', app) output_region_graphs_for_app(first_timestamp, output, capacity_per_region, data_demand_short_per_region, 'demand-short', app) output_region_graphs_for_app(first_timestamp, output, capacity_per_region, data_demand_long_per_region, 'demand-long', app) output_region_capacity_graph_for_app(first_timestamp, output, capacity_per_region, app) output_graphs(first_timestamp, output, data_capacity_summed, data_load_summed, "load") output_graphs(first_timestamp, output, data_capacity_summed, data_demand_short_summed, "demand-short") output_graphs(first_timestamp, output, data_capacity_summed, data_demand_long_summed, "demand-long") output_capacity_graph(first_timestamp, output, data_capacity_summed) get_logger().info("Finished")
def main(argv=None): if argv is None: argv = sys.argv[1:] class ArgumentParserWithDefaults(argparse.ArgumentParser): ''' From https://stackoverflow.com/questions/12151306/argparse-way-to-include-default-values-in-help ''' def add_argument(self, *args, help=None, default=None, **kwargs): if help is not None: kwargs['help'] = help if default is not None and args[0] != '-h': kwargs['default'] = default if help is not None: kwargs['help'] += ' (default: {})'.format(default) super().add_argument(*args, **kwargs) parser = ArgumentParserWithDefaults( formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-l", "--logconfig", dest="logconfig", help="logging configuration (default: logging.json)", default='logging.json') parser.add_argument("--debug", dest="debug", help="Enable interactive debugger on error", action='store_true') parser.add_argument("--input", dest="input", help="Path to all_client_requests.csv", required=True) parser.add_argument( "--first-timestamp-file", dest="first_timestamp_file", help= "Path to file containing the log timestamp that the simulation started", required=True) parser.add_argument("-o", "--output", dest="output", help="Output directory", required=True) parser.add_argument("-s", "--sim-output", dest="sim_output", help="Sim output directory (Required)", required=True) parser.add_argument( "--window-duration", dest="window_duration", help= "Number of minutes to use for the window to compute percentages over", default=1, type=float) args = parser.parse_args(argv) map_utils.setup_logging(default_path=args.logconfig) with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) get_logger().info("Simulation started at %s", first_timestamp) all_requests_path = Path(args.input) if not all_requests_path.exists(): get_logger().error("Cannot read all requests file at %s", all_requests_path) return -1 output = Path(args.output) output.mkdir(parents=True, exist_ok=True) sim_output = Path(args.sim_output) regions = dict() window_duration = datetime.timedelta(minutes=args.window_duration) scenario_dir = sim_output / 'inputs' / 'scenario' node_to_region_dict = map_utils.gather_region_info(scenario_dir) _, ip_to_node_dict = summarize_hifi.parse_node_info(sim_output) get_logger().debug(f"node_to_region_dict = {node_to_region_dict}") get_logger().debug(f"ip_to_node_dict = {ip_to_node_dict}") # region --> service --> window --> ClientDnsRequestCounts requests_region_service_counts = dict() read_all_requests_file(all_requests_path, first_timestamp, window_duration, node_to_region_dict, requests_region_service_counts) read_weighted_dns_files(sim_output, first_timestamp, window_duration, ip_to_node_dict, requests_region_service_counts) output_requests_csv_per_region(requests_region_service_counts, first_timestamp, output) output_requests_graph_per_region(requests_region_service_counts, first_timestamp, output)
def main(argv=None): if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser() parser.add_argument("-l", "--logconfig", dest="logconfig", help="logging configuration (default: logging.json)", default='logging.json') parser.add_argument("-L", "--log-file", dest="log_file_path", help="Log file with timestamps", required=True) parser.add_argument( "-b", "--testbed", dest="testbed", help= "The testbed used for the run. This is used to obtain time zone information.", default='') parser.add_argument("-s", "--run-seconds-start", dest="run_seconds_start", help="Number of seconds into run to start inspection", default='0') parser.add_argument( "-i", "--interval", dest="run_seconds_interval", help="Interval number of seconds within run to inspect", default='') parser.add_argument( "-d", "--identify-log-delays", dest="log_warn_delay", help= "Print a warning between consecutive log statements separated by a delay larger than the value specified in seconds.", default='') parser.add_argument("-f", "--regex-filter", dest="regex_filter", help="Pattern to use for filtering of log statements", default='') parser.add_argument( "-t", "--show-run-seconds", dest="show_run_seconds", action="store_true", help="Adds the number of seconds into the run to each log statement", default='') parser.add_argument("-o", "--output-file", dest="log_output_file_path", help="Log statements output from inspection interval", default='') parser.add_argument( "-p", "--occurrence-plot", dest="occurrence_plot_output_path", help="Create a graph showing occurrences of log messages in time", default='') parser.add_argument( "--first-timestamp-file", dest="first_timestamp_file", help= "Path to file containing the log timestamp that the simulation started", required=True) args = parser.parse_args(argv) map_utils.setup_logging(default_path=args.logconfig) # parse arguments log_file_path = args.log_file_path testbed = (None if args.testbed == '' else args.testbed) run_seconds_start = float(args.run_seconds_start) run_seconds_interval = (None if args.run_seconds_interval == '' else float( args.run_seconds_interval)) log_warn_delay = (None if args.log_warn_delay == '' else timedelta( seconds=float(args.log_warn_delay))) regex_filter = (None if args.regex_filter == '' else re.compile( args.regex_filter)) show_run_seconds = args.show_run_seconds log_output_file_path = args.log_output_file_path occurrence_plot_output_path = (None if args.occurrence_plot_output_path == '' else args.occurrence_plot_output_path) with open(args.first_timestamp_file) as f: ts_str = f.readline().strip() first_timestamp = map_utils.log_timestamp_to_datetime(ts_str) get_logger().info("Simulation started at %s", first_timestamp) time_zone = None #testbed_to_time_zone_map = {'emulab' : tz.tzoffset('MDT', -21600), 'dcomp' : tz.tzoffset('UTC', 0)} #testbed_to_time_zone_map = {'emulab' : timezone(-timedelta(hours=6), 'mdt'), 'dcomp' : timezone.utc} testbed_to_time_zone_map = { 'emulab': timezone('US/Mountain'), 'dcomp': timezone('UTC') } if (testbed != None): if (testbed in testbed_to_time_zone_map): time_zone = testbed_to_time_zone_map.get(testbed.lower()) get_logger().info("Found time zone for testbed '%s': %s.", testbed, time_zone) else: get_logger().fatal("Could not map testbed '%s' to timezone.", testbed) exit(1) get_logger().info("Log File: %s", log_file_path) with open(log_file_path, "r") as log_file: log_line_occurrence_times = list() ref_time = first_timestamp get_logger().info(" Ref time String: %s", map_utils.datetime_to_string(ref_time)) get_logger().info(" Ref time epoch ms: %s", str(datetime_to_epoch_ms(ref_time))) get_logger().info("") get_logger().info("") start_time_within_run = ref_time + timedelta(seconds=run_seconds_start) get_logger().info("At %s seconds into run: ", str(run_seconds_start)) get_logger().info(" Time within run: %s", map_utils.datetime_to_string(start_time_within_run)) get_logger().info(" Epoch ms: %s", str(datetime_to_epoch_ms(start_time_within_run))) get_logger().info("") end_time_within_run = (None if run_seconds_interval == None else ( ref_time + timedelta(seconds=(run_seconds_start + run_seconds_interval)))) if (end_time_within_run != None): get_logger().info("At " + str(run_seconds_start + run_seconds_interval) + " seconds into run: ") get_logger().info( " Time within run: %s", map_utils.datetime_to_string(end_time_within_run)) get_logger().info(" Epoch ms: %s", str(datetime_to_epoch_ms(end_time_within_run))) get_logger().info("\n\n\n") get_logger().info( "<==================== Log Output ====================>\n") if (log_output_file_path != ''): log_output_file = open(log_output_file_path, "w") log_output_file.write("Log statements for run time range \n") log_output_file.write(" Start Time (s): " + str(run_seconds_start) + "\n") if (run_seconds_interval != None): log_output_file.write(" End Time (s): " + str(run_seconds_start + run_seconds_interval) + "\n") log_output_file.write("\n") if (log_warn_delay != None): log_output_file.write( " Max time delay without warning (s): " + str(log_warn_delay) + "\n") log_output_file.write( "====================================================================================================\n\n\n\n" ) else: log_output_file = None log_lines_printed = 0 prev_line = '' prev_line_time = None last_line_matches = False for line in log_file: output_line = line line_time = map_utils.log_line_to_time(line, time_zone) if (line_time != None): if (line_time >= start_time_within_run and (regex_filter == None or re.search(regex_filter, line))): line_prefix = "" if (show_run_seconds): line_run_seconds = (line_time - ref_time).seconds output_line = str( line_run_seconds) + ": " + output_line if (log_warn_delay != None and prev_line_time != None): log_delay = line_time - prev_line_time if (log_output_file != None and log_delay > log_warn_delay): log_output_file.write("<--- Time delay " + str(log_delay) + " > " + str(log_warn_delay) + " --->\n") if (log_lines_printed < 1): get_logger().info(line) if (log_output_file != None): log_output_file.write(output_line) record_output_line_time(ref_time, line_time, log_line_occurrence_times) prev_line = line prev_line_time = line_time log_lines_printed += 1 last_line_matches = True else: last_line_matches = False if (end_time_within_run != None and line_time > end_time_within_run): break else: # write a statements without a valid timestamp if and only if the the most recent line with a valid time was printed # this allows multiline log statements such as exceptions to be printed entirely if (last_line_matches): if (log_output_file != None): log_output_file.write(output_line) get_logger().info(" ......") get_logger().info(prev_line) # close the output file if it was specified if (log_output_file != None): log_output_file.close() if (occurrence_plot_output_path != None): if (regex_filter == None): title = "Times of all log lines" else: title = "Times of log lines matching regex '" + regex_filter.pattern + "'" plot_log_line_occurrence(occurrence_plot_output_path, title, log_line_occurrence_times)