Esempio n. 1
0
def process_logfile(writer, node, agent_logfile):
    initial_pull_timestamp = None
    pull_attempts = 0
    current_image = None

    with open(agent_logfile, 'r') as f:
        for line in f:
            pull_match = pull_re.match(line)
            pull_fail_match = pull_fail_re.match(line)
            pull_result_match = pull_result_re.match(line)

            if pull_match is not None:
                initial_pull_timestamp = map_utils.log_timestamp_to_datetime(
                    pull_match.group('timestamp'))
                current_image = pull_match.group('image')
                pull_attempts = 1
            elif pull_fail_match is not None:
                pull_attempts = pull_attempts + 1
            elif pull_result_match is not None:
                final_timestamp = map_utils.log_timestamp_to_datetime(
                    pull_result_match.group('timestamp'))
                duration = final_timestamp - initial_pull_timestamp
                writer.writerow([
                    node, initial_pull_timestamp, final_timestamp,
                    duration.total_seconds(), pull_attempts,
                    pull_result_match.group('result'), current_image
                ])

                initial_pull_timestamp = None
                pull_attempts = 0
                current_image = None
Esempio n. 2
0
def main_method(args):
    sim_output = Path(args.sim_output)
    if not sim_output.exists():
        get_logger().error("%s does not exist", sim_output)
        return 1

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    get_logger().info("Simulation started at %s", first_timestamp)

    ip_info = gather_network_ips(sim_output)
    get_logger().debug("ip_info: %s", ip_info)

    first_timestamp_secs = first_timestamp.timestamp()
    
    output = Path(args.output)
    output.mkdir(parents=True, exist_ok=True)

    with multiprocessing.Pool(processes=os.cpu_count()) as pool:
        results = list()

        for node_dir in sim_output.iterdir():
            if node_dir.is_dir():
                results.append(pool.apply_async(func=process_node_dir, args=[args.unfiltered, output, node_dir, first_timestamp_secs, ip_info]))

        for result in results:
            result.wait()
Esempio n. 3
0
def main_method(args):
    sim_output = Path(args.sim_output)
    if not sim_output.exists():
        get_logger().error("%s does not exist", sim_output)
        return 1

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    get_logger().info("Simulation started at %s", first_timestamp)
    first_timestamp_ms = first_timestamp.timestamp() * 1000

    output = Path(args.output)
    output.mkdir(parents=True, exist_ok=True)

    # client -> service -> timestamp -> ServiceCounts
    (client_data_sums, client_data_counts, server_data_sums, server_data_counts) = gather_data(sim_output)

    # client graphs
    client_data_sums_relative = relative_timestamps_client(client_data_sums, first_timestamp_ms)
    output_per_node_graphs(output, client_data_sums_relative, 'Client')

    # client service graphs
    client_service_data_sums = compute_per_service(client_data_counts)
    client_service_data_sums_relative = relative_timestamps_service(client_service_data_sums, first_timestamp_ms)
    output_node_graph(output, None, client_service_data_sums_relative, 'Client')

    # server graphs
    server_data_sums_relative = relative_timestamps_client(server_data_sums, first_timestamp_ms)
    output_per_node_graphs(output, server_data_sums_relative, 'Server')

    # server service graphs
    server_service_data_sums = compute_per_service(server_data_counts)
    server_service_data_sums_relative = relative_timestamps_service(server_service_data_sums, first_timestamp_ms)
    output_node_graph(output, None, server_service_data_sums_relative, 'Server')
def main_method(args):
    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    get_logger().info("Simulation started at %s", first_timestamp)

    first_timestamp_ms = first_timestamp.timestamp() * 1000
    window_size = int(args.window_size)
    get_logger().debug("Window size: %d", window_size)
    charts_dir = Path(args.chart_output)
    sim_output = Path(args.sim_output)

    output = Path(args.output)
    output.mkdir(parents=True, exist_ok=True)

    host_ip = pd.read_csv(sim_output / 'inputs/scenario/host-ip.csv')
    host_ip['host lower'] = host_ip['host'].str.lower()

    subnet_to_region = generate_hopcount_graphs.load_subnet_information(sim_output)
    
    frames = list()
    latency_dns = charts_dir / 'latency_dns'
    for filename in latency_dns.glob('client_processing_latency-*.csv'):
        get_logger().debug("Reading %s", filename)
        
        match = re.match(r'client_processing_latency-(.*).csv', filename.name)
        if not match:
            raise RuntimeError("Could not find service name in " + filename)
        service = match.group(1)
        df = pd.read_csv(filename)
        if len(df.index) < 1:
            get_logger().warning("No data found in %s", filename)
        else:
            df['service'] = service
        frames.append(df)

    if len(frames) < 1:
        get_logger().error("No data to process")
        return 0
    df = pd.concat(frames)
    
    df = process_data(first_timestamp_ms, host_ip, subnet_to_region, window_size, df)

    all_statuses = df['message'].unique()
    
    for client_region in df['client region'].unique():
        client_data = df[df['client region'] == client_region]
        
        services = df['service'].unique()
        for service in services:
            service_data = client_data[client_data['service'] == service]
            graph_request_result(output, all_statuses, service_data, client_region, service)
            
    
    return 0
Esempio n. 5
0
def main_method(args):
    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    first_timestamp_ms = first_timestamp.timestamp() * 1000
    get_logger().info("Simulation started at %s -> %d", first_timestamp,
                      first_timestamp_ms)

    output = Path(args.output) / 'inferred-demand'
    if not output.exists():
        get_logger().error(
            "%s does not exist and is required to read the CSV files", output)

    for csv_file in output.glob('*.csv'):
        process_file(first_timestamp_ms, output, csv_file)
Esempio n. 6
0
def main_method(args):
    chart_output = Path(args.chart_output)
    if not chart_output.exists():
        get_logger().error("%s does not exist", chart_output)
        return 1

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    min_time = first_timestamp.timestamp() * 1000

    output_dir = Path(args.output)
    output_dir.mkdir(parents=True, exist_ok=True)

    network_dir = chart_output / 'network'

    node_data = load_node_data(network_dir, min_time)
    if node_data is None:
        get_logger().error("No node data, cannot continue")
        return 1

    container_data = load_container_data(network_dir, min_time)

    with multiprocessing.Pool(processes=os.cpu_count()) as pool:
        results = list()

        for node in node_data['node'].unique():
            results.append(
                pool.apply_async(func=process_node,
                                 args=[output_dir, node_data, node]))
            results.append(
                pool.apply_async(func=process_node_per_service,
                                 args=[output_dir, node_data, node]))
        for container in container_data['container'].unique():
            results.append(
                pool.apply_async(func=process_container,
                                 args=[output_dir, container_data, container]))
            results.append(
                pool.apply_async(func=process_container_per_service,
                                 args=[output_dir, container_data, container]))

        for result in results:
            result.wait()
def main_method(args):
    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)

    get_logger().info("Simulation started at %s", first_timestamp)

    all_requests_path = Path(args.input)
    if not all_requests_path.exists():
        get_logger().error("Cannot read all requests file at %s",
                           all_requests_path)
        return -1

    output = Path(args.output)
    output.mkdir(parents=True, exist_ok=True)

    sim_output = Path(args.sim_output)
    regions = dict()

    parse_region_subnets(sim_output, regions)

    all_requests = dict()  # service -> ClientRequest
    with open(all_requests_path) as f:
        reader = csv.DictReader(map_utils.skip_null_lines(f))
        for row in reader:
            ts_str = row['time_sent']
            service = row['service']
            timestamp = datetime.datetime.fromtimestamp(int(ts_str) / 1000,
                                                        tz=pytz.UTC)

            dest_ip = row['server']
            dest_region = summarize_hifi.region_for_ip(regions, dest_ip)

            service_requests = all_requests.get(service, list())
            req = ClientRequest(timestamp, dest_region)
            service_requests.append(req)
            all_requests[service] = service_requests

    window_duration = datetime.timedelta(minutes=args.window_duration)

    for service, service_requests in all_requests.items():
        process_service(output, first_timestamp, regions, window_duration,
                        service, service_requests)
Esempio n. 8
0
def main_method(args):
    sim_output = Path(args.sim_output)
    if not sim_output.exists():
        get_logger().error("%s does not exist", sim_output)
        return 1

    scenario_dir = Path(args.scenario)
    if not scenario_dir.exists():
        get_logger().error("%s does not exist", scenario_dir)
        return 1

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
        first_timestamp_ms = first_timestamp.timestamp() * 1000
        get_logger().info("Simulation started at %s -> %d", first_timestamp, first_timestamp_ms)
    
    output = Path(args.output)
    output.mkdir(parents=True, exist_ok=True)

    show_load_demand_attr = args.show_load_demand_attr if args.show_load_demand_attr != '' else None

    all_services = map_utils.gather_all_services(scenario_dir)
    
    with multiprocessing.Pool(processes=os.cpu_count()) as pool:
        results = list()
        for node_dir in sim_output.iterdir():
            if not node_dir.is_dir():
                continue
            node_name_dir = map_utils.find_ncp_folder(node_dir)
            if node_name_dir is None:
                get_logger().debug("No NCP folder found in %s", node_dir)
                continue
            get_logger().debug(f"node_dir: {node_dir}")
            results.append(pool.apply_async(func=process_node, args=[output, first_timestamp_ms, all_services, node_name_dir, show_load_demand_attr]))

        for result in results:
            result.wait()
Esempio n. 9
0
def process_client_dir(client_dir):
    unknown_host_re = re.compile(r'(?P<timestamp>\d{4}-\d{2}-\d{2}/\d{2}:\d{2}:\d{2}\.\d{3}/\S+).*Unable to find host (?P<host>\S+) after (?P<duration>\d+) ms')
    acceptable_delta = datetime.timedelta(seconds=1)

    prev_long_failure = dict() # name -> timestamp
    logfiles = sorted(client_dir.glob("map-client*.log"), key=lambda f: f.stat().st_mtime)
    with fileinput.input(files=logfiles) as f:
        for line in f:
            match = unknown_host_re.match(line)
            if match:
                timestamp = map_utils.log_timestamp_to_datetime(match.group('timestamp'))
                hostname = match.group('host')
                duration = int(match.group('duration'))
                if duration > 0:
                    prev_long_failure[hostname] = timestamp
                elif duration == 0:
                    if hostname not in prev_long_failure:
                        get_logger().warning("Lookup at %s of %s took zero time and there is no previous long lookup", timestamp, hostname)
                    else:
                        prev_timestamp = prev_long_failure[hostname]
                        diff = timestamp - prev_timestamp
                        if diff > acceptable_delta:
                            get_logger().warning("Lookup at %s of %s took zero time and the last lookup was at %s", timestamp, hostname, prev_timestamp)
Esempio n. 10
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    parser = argparse.ArgumentParser()
    parser.add_argument("-l",
                        "--logconfig",
                        dest="logconfig",
                        help="logging configuration (default: logging.json)",
                        default='logging.json')
    parser.add_argument("-c",
                        "--chart_output",
                        dest="chart_output",
                        help="Chart output directory (Required)",
                        required=True)
    parser.add_argument("-o",
                        "--output",
                        dest="output",
                        help="Output directory (Required)",
                        required=True)
    parser.add_argument(
        "--first-timestamp-file",
        dest="first_timestamp_file",
        help=
        "Path to file containing the log timestamp that the simulation started",
        required=True)
    parser.add_argument(
        "--sample-duration",
        dest="sample_duration",
        help=
        "Number of minutes between samples of plans for expected percentages plots",
        default=1,
        type=float)
    parser.add_argument("--scenario",
                        dest="scenario",
                        help="Scenario directory (Required)",
                        required=True)

    args = parser.parse_args(argv)

    map_utils.setup_logging(default_path=args.logconfig)

    chart_output = Path(args.chart_output)
    if not chart_output.exists():
        get_logger().error("%s does not exist", chart_output)
        return 1

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    get_logger().info("Simulation started at %s", first_timestamp)

    output_dir = Path(args.output)
    output_dir.mkdir(parents=True, exist_ok=True)

    dcop_file = chart_output / 'dcop_plan_updates/all_dcop_plans.csv'
    dcop_plans = pd.read_csv(dcop_file, na_values="?")
    dcop_plans.fillna(0, inplace=True)
    dcop_plans.sort_values(by=['time'], inplace=True)

    rlg_file = chart_output / 'rlg_plan_updates/all_rlg_overflow_plans.csv'
    rlg_plans = pd.read_csv(rlg_file, na_values="?")
    rlg_plans.fillna(0, inplace=True)
    rlg_plans.sort_values(by=['time'], inplace=True)

    dns_file = chart_output / 'dns_region_plan_updates/all_dns_region_plans.csv'
    dns_plans = pd.read_csv(dns_file, na_values="?")
    dns_plans.fillna(0, inplace=True)
    dns_plans.sort_values(by=['time'], inplace=True)

    first_timestamp_ms = first_timestamp.timestamp() * 1000
    dcop_plans['time_minutes'] = (dcop_plans['time'] -
                                  first_timestamp_ms) / 1000 / 60
    rlg_plans['time_minutes'] = (rlg_plans['time'] -
                                 first_timestamp_ms) / 1000 / 60
    dns_plans['time_minutes'] = (dns_plans['time'] -
                                 first_timestamp_ms) / 1000 / 60

    all_regions = np.union1d(dcop_plans['plan_region'].unique(),
                             rlg_plans['plan_region'].unique())
    all_services = np.union1d(dcop_plans['service'].unique(),
                              rlg_plans['service'].unique())

    for region in all_regions:
        for service in all_services:
            plot_region_service_plan_type(output_dir, "DCOP", dcop_plans,
                                          all_regions, region, service)
            plot_region_service_plan_type(output_dir, "RLG", rlg_plans,
                                          all_regions, region, service)
            plot_region_service_plan_type(output_dir, "DNS", dns_plans,
                                          all_regions, region, service)

    scenario_dir = Path(args.scenario)
    if not scenario_dir.exists():
        get_logger().error("%s does not exist", scenario_dir)
        return 1

    client_regions = compute_client_regions(scenario_dir)

    # plot expected percentages
    sample_duration = args.sample_duration
    for source_region in client_regions:
        for service in all_services:
            times, region_weights = compute_expected(sample_duration,
                                                     all_regions, 'DCOP',
                                                     dcop_plans, source_region,
                                                     service)
            plot_expected(output_dir, 'DCOP', source_region, service, times,
                          region_weights)
            times, region_weights = compute_expected(sample_duration,
                                                     all_regions, 'RLG',
                                                     rlg_plans, source_region,
                                                     service)
            plot_expected(output_dir, 'RLG', source_region, service, times,
                          region_weights)
            times, region_weights = compute_expected(sample_duration,
                                                     all_regions, 'DNS',
                                                     dns_plans, source_region,
                                                     service)
            plot_expected(output_dir, 'DNS', source_region, service, times,
                          region_weights)

    get_logger().info("Finished")
Esempio n. 11
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    parser = argparse.ArgumentParser()
    parser.add_argument("-l",
                        "--logconfig",
                        dest="logconfig",
                        help="logging configuration (default: logging.json)",
                        default='logging.json')
    parser.add_argument("-s",
                        "--sim-output",
                        dest="sim_output",
                        help="Chart output directory (Required)",
                        required=True)
    parser.add_argument("-o",
                        "--output",
                        dest="output",
                        help="Output directory (Required)",
                        required=True)
    parser.add_argument(
        "--first-timestamp-file",
        dest="first_timestamp_file",
        help=
        "Path to file containing the log timestamp that the simulation started",
        required=True)

    args = parser.parse_args(argv)

    map_utils.setup_logging(default_path=args.logconfig)

    sim_output = Path(args.sim_output)
    if not sim_output.exists():
        get_logger().error("%s does not exist", sim_output)
        return 1

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    get_logger().info("Simulation started at %s", first_timestamp)
    first_timestamp_ms = first_timestamp.timestamp() * 1000

    output = Path(args.output)
    output.mkdir(parents=True, exist_ok=True)

    # app -> node_attr -> container_name -> timestamp -> value
    data_load = dict()
    data_demand = dict()
    for node_dir in sim_output.iterdir():
        if not node_dir.is_dir():
            continue

        get_logger().debug("Processing node %s", node_dir)
        agent_dir = node_dir / 'agent'

        if agent_dir.is_dir():
            get_logger().debug("\tAgent dir %s", agent_dir)
            for node_name_dir in agent_dir.iterdir():
                if not node_name_dir.is_dir():
                    continue

                for time_dir in node_name_dir.iterdir():
                    if not time_dir.is_dir():
                        continue

                    add_time_dir_data(time_dir, data_load, data_demand)
        else:
            for time_dir in node_dir.iterdir():
                if not time_dir.is_dir():
                    continue

                add_time_dir_data(time_dir, data_load, data_demand)

    output_graphs(first_timestamp_ms, output, data_load, "load")
    output_graphs(first_timestamp_ms, output, data_demand, "demand")
Esempio n. 12
0
def main_method(args):
    sim_output = Path(args.sim_output)
    if not sim_output.exists():
        get_logger().error("%s does not exist", sim_output)
        return 1

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    first_timestamp_ms = first_timestamp.timestamp() * 1000
    get_logger().info("Simulation started at %s -> %d", first_timestamp,
                      first_timestamp_ms)

    output = Path(args.output) / 'rlg-resource-report-lag'
    output.mkdir(parents=True, exist_ok=True)

    for node_dir in sim_output.iterdir():
        max_diff = None
        if not node_dir.is_dir():
            continue

        get_logger().debug("Processing node %s", node_dir)
        node_name_dir = map_utils.find_ncp_folder(node_dir)
        if node_name_dir is None:
            get_logger().debug("No NCP folder found")
            continue
        ncp = map_utils.node_name_from_dir(node_name_dir)

        all_known_nodes = set()
        xs = list()
        ys = dict()  # report node -> series of diffs
        for time_dir in sorted(node_name_dir.iterdir()):
            if not time_dir.is_dir():
                continue

            directory_time = int(time_dir.stem)

            get_logger().debug("\t\tProcessing time %s", time_dir)
            resource_report_file = time_dir / 'regionResourceReports-SHORT.json'
            if resource_report_file.exists():
                try:
                    with open(resource_report_file, 'r') as f:
                        resource_reports = json.load(f)
                except json.decoder.JSONDecodeError:
                    get_logger().warning("Problem reading %s, skipping",
                                         resource_report_file)
                    continue

                xs.append(directory_time)

                seen_nodes = set()
                for resource_report in resource_reports:
                    report_time = int(resource_report['timestamp'])
                    if report_time < 1:
                        # skip reports that we don't have a rational time for
                        diff = None
                    else:
                        diff = map_utils.timestamp_to_seconds(directory_time -
                                                              report_time)
                    node = resource_report['nodeName']['name']

                    if node in seen_nodes:
                        get_logger().warning(
                            "Saw multiple reports from %s in %s, skipping the second one",
                            node, time_dir)
                        continue

                    seen_nodes.add(node)
                    all_known_nodes.add(node)

                    # default value is setup to ensure that newly discovered
                    # nodes have a list of values the same length as the other lists
                    node_series = ys.get(node, [None] * (len(xs) - 1))
                    node_series.append(diff)
                    ys[node] = node_series
                    get_logger().debug("Added %s to %s xs: %d node_series: %d",
                                       diff, node, len(xs), len(node_series))

                    if max_diff is None or diff > max_diff:
                        max_diff = diff

                # make sure we skip values for any nodes that we should have seen
                missing_nodes = all_known_nodes - seen_nodes
                if len(missing_nodes) > 0:
                    get_logger().debug("Missing nodes: %s Seen: %s",
                                       missing_nodes, seen_nodes)
                    for missing_node in missing_nodes:
                        # the key must exist by now
                        node_series = ys[missing_node]
                        node_series.append(None)
                        ys[missing_node] = node_series
                        get_logger().debug("Added None to %s", missing_node)

                for node, node_series in ys.items():
                    if len(xs) != len(node_series):
                        raise RuntimeError(
                            f"List sizes not correct for {node} {len(xs)} != {len(node_series)}"
                        )

        if len(xs) > 0:
            get_logger().info("Maximum diff for %s is %s ms", ncp, max_diff)
            output_graph(output, ncp, xs, ys, first_timestamp_ms)
Esempio n. 13
0
def main_method(args):
    sim_output = Path(args.sim_output)
    if not sim_output.exists():
        get_logger().error("%s does not exist", sim_output)
        return 1

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)

    earliest_time = first_timestamp.timestamp() * 1000
    
    output_dir = Path(args.output)
    output_dir.mkdir(parents=True, exist_ok=True)

    # read all of the data in
    client_frames = list()
    server_frames = list()
    with multiprocessing.Pool(processes=os.cpu_count()) as pool:
        client_results = list()
        server_results = list()
        for node_dir in sim_output.iterdir():
            if not node_dir.is_dir():
                continue

            client_metrics_dir = node_dir / 'client/container_data'
            if client_metrics_dir.exists() and client_metrics_dir.is_dir():
                result = pool.apply_async(func=process_client, args=[node_dir.name, client_metrics_dir])
                client_results.append(result)

            server_container_data = node_dir / 'agent/container_data'
            if server_container_data.exists() and server_container_data.is_dir():
                result = pool.apply_async(func=process_server, args=[node_dir.name, server_container_data])
                server_results.append(result)

        for result in client_results:
            frames = result.get()
            client_frames.extend(frames)

        for result in server_results:
            (s_frames, c_frames) = result.get()
            server_frames.extend(s_frames)
            client_frames.extend(c_frames)
            
    if len(client_frames) > 0:
        client_data = pd.concat(client_frames, ignore_index=True)
        client_data['time_minutes'] = (client_data['time_ack_received'] - earliest_time) / 1000 / 60
        client_data['latency_seconds'] = client_data['latency'] / 1000
        all_clients = client_data['client'].unique()

        get_logger().debug("Client services {}".format(client_data['service'].unique()))

        with open(output_dir / 'all_client_requests.csv', 'w') as f:
            client_data.to_csv(f, index=False)
    else:
        get_logger().warning("No client data")

    if len(server_frames) > 0:
        server_data = pd.concat(server_frames, ignore_index=True)

        server_data['time_minutes'] = (server_data['timestamp'] - earliest_time) / 1000 / 60
        server_data['latency_seconds'] = server_data['latency'] / 1000

        get_logger().debug(server_data.head())
        get_logger().debug("Server services {}".format(server_data['service'].unique()))
    else:
        get_logger().warning("No server data")


    if len(server_frames) < 1 or len(client_frames) < 1:
        get_logger().error("Need both client and server data to write out remaining files, skipping remaining outputs")
        return 0
        
    all_services = np.unique(np.concatenate([client_data['service'].unique(), server_data['service'].unique()], axis=0))

    get_logger().debug("all services {}".format(all_services))

    write_client_data(output_dir, all_clients, all_services, client_data)
    write_per_client(output_dir, all_services, all_clients, client_data)

    write_server_data(output_dir, all_services, server_data)

    write_combined(output_dir, all_services, server_data, client_data)
    write_combined_per_service(output_dir, all_services, server_data, client_data)
Esempio n. 14
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    parser = argparse.ArgumentParser()
    parser.add_argument("-l",
                        "--logconfig",
                        dest="logconfig",
                        help="logging configuration (default: logging.json)",
                        default='logging.json')
    parser.add_argument(
        "--first-timestamp-file",
        dest="first_timestamp_file",
        help=
        "Path to file containing the log timestamp that the simulation started",
        required=True)
    parser.add_argument("-s",
                        "--sim-output",
                        dest="sim_output",
                        help="Chart output directory (Required)",
                        required=True)
    parser.add_argument("-o",
                        "--output",
                        dest="output",
                        help="Output directory (Required)",
                        required=True)
    parser.add_argument("-c",
                        "--container-capacity",
                        dest="container_queue_length_capacity",
                        help="QueueLength capacity per container",
                        default=None)
    parser.add_argument(
        "--process-latency-data",
        dest="process_latency_data",
        action='store_true',
        help=
        "If present, show queue length estimate from processing_latency.csv files."
    )

    args = parser.parse_args(argv)

    map_utils.setup_logging(default_path=args.logconfig)

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    first_timestamp_ms = first_timestamp.timestamp() * 1000

    get_logger().info("Run started at %s", first_timestamp)

    sim_output = Path(args.sim_output)
    if not sim_output.exists():
        get_logger().error("%s does not exist", sim_output)
        return 1

    output = Path(args.output)
    output.mkdir(parents=True, exist_ok=True)

    container_queue_length_capacity = int(
        args.container_queue_length_capacity
    ) if args.container_queue_length_capacity else None

    process_latency_data = args.process_latency_data

    # app -> node_attr -> container_name -> timestamp -> value
    data_load = dict()
    data_demand = dict()
    container_processing_latency = dict()

    for node_dir in sim_output.iterdir():
        if not node_dir.is_dir():
            continue

        get_logger().debug("Processing node %s", node_dir)
        agent_dir = node_dir / 'agent'
        if not agent_dir.is_dir():
            continue

        get_logger().debug("\tAgent dir %s", agent_dir)
        for node_name_dir in agent_dir.iterdir():
            if not node_name_dir.is_dir():
                continue

            for time_dir in node_name_dir.iterdir():
                if not time_dir.is_dir():
                    continue

                get_logger().debug("\t\tProcessing time %s", time_dir)
                resource_report_file = time_dir / 'resourceReport-SHORT.json'
                if resource_report_file.exists():
                    try:
                        with open(resource_report_file, 'r') as f:
                            resource_report = json.load(f)
                        time = int(time_dir.stem)
                        if 'containerReports' in resource_report:
                            for container_name, container_report in resource_report[
                                    'containerReports'].items():
                                container_name_short = container_name.split(
                                    '.')[0]
                                process_container_report(
                                    data_load, data_demand,
                                    container_name_short, time,
                                    container_report)
                    except json.decoder.JSONDecodeError:
                        get_logger().warning("Problem reading %s, skipping",
                                             resource_report_file)

        container_data_dir = agent_dir / 'container_data'

        if process_latency_data:
            for container_data_service_dir in container_data_dir.iterdir():
                match = re.search(r'^(.+)\.([^\._]+)_(.+)$',
                                  container_data_service_dir.name)

                if match:
                    artifact = match.group(2)
                    for container_data_service_name_dir in container_data_service_dir.iterdir(
                    ):
                        match = re.search(r'^([^\.]+)\.map\.dcomp$',
                                          container_data_service_name_dir.name)

                        if match:
                            container_name = match.group(1)
                            for container_data_service_name_instance_dir in container_data_service_name_dir.iterdir(
                            ):
                                processing_latency_file = container_data_service_name_instance_dir / 'app_metrics_data' / 'processing_latency.csv'

                                container_instance_latency_data = dict()

                                with open(processing_latency_file, "r") as f:
                                    reader = csv.DictReader(f)

                                    for row in reader:
                                        event = row['event']
                                        time_start = int(row['time_received'])
                                        time_end = int(row['time_ack_sent'])

                                        if event == 'request_success':
                                            container_instance_latency_data[
                                                time_start] = 1
                                            container_instance_latency_data[
                                                time_end] = -1

                                container_processing_latency.setdefault(
                                    artifact, dict()).setdefault(
                                        container_name, list()).append(
                                            container_instance_latency_data)

    container_expected_queue_lengths = accumulate_expected_queue_length(
        container_processing_latency)
    output_graphs(first_timestamp_ms, output, data_load,
                  container_expected_queue_lengths, "load",
                  container_queue_length_capacity)
Esempio n. 15
0
def main_method(args):
    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    get_logger().info("Simulation started at %s", first_timestamp)
    
    sim_output = Path(args.sim_output)
    if not sim_output.exists():
        get_logger().error("%s does not exist", sim_output)
        return 1

    scenario_dir = Path(args.scenario)
    if not scenario_dir.exists():
        get_logger().error("%s does not exist", scenario_dir)
        return 1
    
    output = Path(args.output)
    output.mkdir(parents=True, exist_ok=True)

    all_apps = map_utils.gather_all_services(scenario_dir)

    # container_name -> ncp_name
    container_node = dict()
    
    # node_attr -> app -> ncp_name -> timestamp -> value
    data_load = dict() 
    data_demand_short = dict()
    data_demand_long = dict()
    # node_attr -> app -> container_name -> timestamp -> value
    data_capacity = dict()
    for node_dir in sim_output.iterdir():
        if not node_dir.is_dir():
            continue
        process_node(first_timestamp, all_apps, data_load, data_demand_short, data_demand_long, data_capacity, node_dir, container_node)

    results = list()

    ## process raw data
    get_logger().info("Processing raw data")

    # ncp_name -> region_name
    node_region = map_utils.gather_region_info(scenario_dir)
    get_logger().debug("node_region: %s", node_region)

    get_logger().info("Summing values by region")
    data_load_per_region = sum_values_by_region(data_load, node_region)
    data_demand_short_per_region = sum_values_by_region(data_demand_short, node_region)

    data_demand_long_per_region = sum_values_by_region(data_demand_long, node_region)
    capacity_per_region = sum_capacity_values_by_region(data_capacity, node_region, container_node)
    del container_node
    del node_region

    get_logger().info("Summing load")
    #get_logger().debug("Data load: %s", data_load)
    # node_attr -> app -> timestamp -> value
    data_load_summed = sum_values(data_load)
    #get_logger().debug("Data load summed: %s", data_load_summed)
    del data_load

    get_logger().info("Summing demand")
    # node_attr -> app -> timestamp -> value
    data_demand_short_summed = sum_values(data_demand_short)
    data_demand_long_summed = sum_values(data_demand_long)
    del data_demand_short
    del data_demand_long

    #get_logger().debug("Data capacity: %s", data_capacity)
    # node_attr -> app -> timestamp -> value
    data_capacity_summed = sum_values(data_capacity)
    #get_logger().debug("Data capacity summed: %s", data_capacity_summed)
    del data_capacity

    get_logger().info("Starting graph creation")

    output_graphs_per_region(first_timestamp, output, capacity_per_region, data_load_per_region, 'load')
    output_graphs_per_region(first_timestamp, output, capacity_per_region, data_demand_short_per_region, 'demand-short')
    output_graphs_per_region(first_timestamp, output, capacity_per_region, data_demand_long_per_region, 'demand-long')
    output_capacity_graph_per_region(first_timestamp, output, capacity_per_region)

    for app in all_apps:
        output_region_graphs_for_app(first_timestamp, output, capacity_per_region, data_load_per_region, 'load', app)
        output_region_graphs_for_app(first_timestamp, output, capacity_per_region, data_demand_short_per_region, 'demand-short', app)
        output_region_graphs_for_app(first_timestamp, output, capacity_per_region, data_demand_long_per_region, 'demand-long', app)
        output_region_capacity_graph_for_app(first_timestamp, output, capacity_per_region, app)


    output_graphs(first_timestamp, output, data_capacity_summed, data_load_summed, "load")
    output_graphs(first_timestamp, output, data_capacity_summed, data_demand_short_summed, "demand-short")
    output_graphs(first_timestamp, output, data_capacity_summed, data_demand_long_summed, "demand-long")
    output_capacity_graph(first_timestamp, output, data_capacity_summed)

    get_logger().info("Finished")
Esempio n. 16
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    class ArgumentParserWithDefaults(argparse.ArgumentParser):
        '''
        From https://stackoverflow.com/questions/12151306/argparse-way-to-include-default-values-in-help
        '''
        def add_argument(self, *args, help=None, default=None, **kwargs):
            if help is not None:
                kwargs['help'] = help
            if default is not None and args[0] != '-h':
                kwargs['default'] = default
                if help is not None:
                    kwargs['help'] += ' (default: {})'.format(default)
            super().add_argument(*args, **kwargs)

    parser = ArgumentParserWithDefaults(
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("-l",
                        "--logconfig",
                        dest="logconfig",
                        help="logging configuration (default: logging.json)",
                        default='logging.json')
    parser.add_argument("--debug",
                        dest="debug",
                        help="Enable interactive debugger on error",
                        action='store_true')
    parser.add_argument("--input",
                        dest="input",
                        help="Path to all_client_requests.csv",
                        required=True)
    parser.add_argument(
        "--first-timestamp-file",
        dest="first_timestamp_file",
        help=
        "Path to file containing the log timestamp that the simulation started",
        required=True)
    parser.add_argument("-o",
                        "--output",
                        dest="output",
                        help="Output directory",
                        required=True)
    parser.add_argument("-s",
                        "--sim-output",
                        dest="sim_output",
                        help="Sim output directory (Required)",
                        required=True)
    parser.add_argument(
        "--window-duration",
        dest="window_duration",
        help=
        "Number of minutes to use for the window to compute percentages over",
        default=1,
        type=float)

    args = parser.parse_args(argv)
    map_utils.setup_logging(default_path=args.logconfig)

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)

    get_logger().info("Simulation started at %s", first_timestamp)

    all_requests_path = Path(args.input)
    if not all_requests_path.exists():
        get_logger().error("Cannot read all requests file at %s",
                           all_requests_path)
        return -1

    output = Path(args.output)
    output.mkdir(parents=True, exist_ok=True)

    sim_output = Path(args.sim_output)
    regions = dict()

    window_duration = datetime.timedelta(minutes=args.window_duration)

    scenario_dir = sim_output / 'inputs' / 'scenario'
    node_to_region_dict = map_utils.gather_region_info(scenario_dir)
    _, ip_to_node_dict = summarize_hifi.parse_node_info(sim_output)

    get_logger().debug(f"node_to_region_dict = {node_to_region_dict}")
    get_logger().debug(f"ip_to_node_dict = {ip_to_node_dict}")

    # region --> service --> window --> ClientDnsRequestCounts
    requests_region_service_counts = dict()

    read_all_requests_file(all_requests_path, first_timestamp, window_duration,
                           node_to_region_dict, requests_region_service_counts)
    read_weighted_dns_files(sim_output, first_timestamp, window_duration,
                            ip_to_node_dict, requests_region_service_counts)

    output_requests_csv_per_region(requests_region_service_counts,
                                   first_timestamp, output)
    output_requests_graph_per_region(requests_region_service_counts,
                                     first_timestamp, output)
Esempio n. 17
0
def main(argv=None):

    if argv is None:
        argv = sys.argv[1:]

    parser = argparse.ArgumentParser()
    parser.add_argument("-l",
                        "--logconfig",
                        dest="logconfig",
                        help="logging configuration (default: logging.json)",
                        default='logging.json')
    parser.add_argument("-L",
                        "--log-file",
                        dest="log_file_path",
                        help="Log file with timestamps",
                        required=True)
    parser.add_argument(
        "-b",
        "--testbed",
        dest="testbed",
        help=
        "The testbed used for the run. This is used to obtain time zone information.",
        default='')
    parser.add_argument("-s",
                        "--run-seconds-start",
                        dest="run_seconds_start",
                        help="Number of seconds into run to start inspection",
                        default='0')
    parser.add_argument(
        "-i",
        "--interval",
        dest="run_seconds_interval",
        help="Interval number of seconds within run to inspect",
        default='')
    parser.add_argument(
        "-d",
        "--identify-log-delays",
        dest="log_warn_delay",
        help=
        "Print a warning between consecutive log statements separated by a delay larger than the value specified in seconds.",
        default='')
    parser.add_argument("-f",
                        "--regex-filter",
                        dest="regex_filter",
                        help="Pattern to use for filtering of log statements",
                        default='')
    parser.add_argument(
        "-t",
        "--show-run-seconds",
        dest="show_run_seconds",
        action="store_true",
        help="Adds the number of seconds into the run to each log statement",
        default='')
    parser.add_argument("-o",
                        "--output-file",
                        dest="log_output_file_path",
                        help="Log statements output from inspection interval",
                        default='')
    parser.add_argument(
        "-p",
        "--occurrence-plot",
        dest="occurrence_plot_output_path",
        help="Create a graph showing occurrences of log messages in time",
        default='')
    parser.add_argument(
        "--first-timestamp-file",
        dest="first_timestamp_file",
        help=
        "Path to file containing the log timestamp that the simulation started",
        required=True)

    args = parser.parse_args(argv)

    map_utils.setup_logging(default_path=args.logconfig)

    # parse arguments
    log_file_path = args.log_file_path
    testbed = (None if args.testbed == '' else args.testbed)
    run_seconds_start = float(args.run_seconds_start)
    run_seconds_interval = (None if args.run_seconds_interval == '' else float(
        args.run_seconds_interval))
    log_warn_delay = (None if args.log_warn_delay == '' else timedelta(
        seconds=float(args.log_warn_delay)))
    regex_filter = (None if args.regex_filter == '' else re.compile(
        args.regex_filter))
    show_run_seconds = args.show_run_seconds
    log_output_file_path = args.log_output_file_path
    occurrence_plot_output_path = (None if args.occurrence_plot_output_path
                                   == '' else args.occurrence_plot_output_path)

    with open(args.first_timestamp_file) as f:
        ts_str = f.readline().strip()
        first_timestamp = map_utils.log_timestamp_to_datetime(ts_str)
    get_logger().info("Simulation started at %s", first_timestamp)

    time_zone = None

    #testbed_to_time_zone_map = {'emulab' : tz.tzoffset('MDT', -21600), 'dcomp' : tz.tzoffset('UTC', 0)}
    #testbed_to_time_zone_map = {'emulab' : timezone(-timedelta(hours=6), 'mdt'), 'dcomp' : timezone.utc}
    testbed_to_time_zone_map = {
        'emulab': timezone('US/Mountain'),
        'dcomp': timezone('UTC')
    }
    if (testbed != None):
        if (testbed in testbed_to_time_zone_map):
            time_zone = testbed_to_time_zone_map.get(testbed.lower())
            get_logger().info("Found time zone for testbed '%s': %s.", testbed,
                              time_zone)
        else:
            get_logger().fatal("Could not map testbed '%s' to timezone.",
                               testbed)
            exit(1)

    get_logger().info("Log File: %s", log_file_path)

    with open(log_file_path, "r") as log_file:
        log_line_occurrence_times = list()

        ref_time = first_timestamp

        get_logger().info("  Ref time String: %s",
                          map_utils.datetime_to_string(ref_time))
        get_logger().info("  Ref time epoch ms: %s",
                          str(datetime_to_epoch_ms(ref_time)))

        get_logger().info("")
        get_logger().info("")

        start_time_within_run = ref_time + timedelta(seconds=run_seconds_start)
        get_logger().info("At %s seconds into run: ", str(run_seconds_start))
        get_logger().info("  Time within run: %s",
                          map_utils.datetime_to_string(start_time_within_run))
        get_logger().info("    Epoch ms: %s",
                          str(datetime_to_epoch_ms(start_time_within_run)))

        get_logger().info("")

        end_time_within_run = (None if run_seconds_interval == None else (
            ref_time +
            timedelta(seconds=(run_seconds_start + run_seconds_interval))))

        if (end_time_within_run != None):
            get_logger().info("At " +
                              str(run_seconds_start + run_seconds_interval) +
                              " seconds into run: ")
            get_logger().info(
                "  Time within run: %s",
                map_utils.datetime_to_string(end_time_within_run))
            get_logger().info("    Epoch ms: %s",
                              str(datetime_to_epoch_ms(end_time_within_run)))

        get_logger().info("\n\n\n")

        get_logger().info(
            "<==================== Log Output ====================>\n")

        if (log_output_file_path != ''):
            log_output_file = open(log_output_file_path, "w")
            log_output_file.write("Log statements for run time range \n")
            log_output_file.write("  Start Time (s):  " +
                                  str(run_seconds_start) + "\n")

            if (run_seconds_interval != None):
                log_output_file.write("  End Time   (s):  " +
                                      str(run_seconds_start +
                                          run_seconds_interval) + "\n")

            log_output_file.write("\n")

            if (log_warn_delay != None):
                log_output_file.write(
                    "  Max time delay without warning (s):  " +
                    str(log_warn_delay) + "\n")

            log_output_file.write(
                "====================================================================================================\n\n\n\n"
            )
        else:
            log_output_file = None

        log_lines_printed = 0
        prev_line = ''
        prev_line_time = None

        last_line_matches = False

        for line in log_file:
            output_line = line

            line_time = map_utils.log_line_to_time(line, time_zone)

            if (line_time != None):
                if (line_time >= start_time_within_run and
                    (regex_filter == None or re.search(regex_filter, line))):
                    line_prefix = ""
                    if (show_run_seconds):
                        line_run_seconds = (line_time - ref_time).seconds
                        output_line = str(
                            line_run_seconds) + ":   " + output_line

                    if (log_warn_delay != None and prev_line_time != None):
                        log_delay = line_time - prev_line_time
                        if (log_output_file != None
                                and log_delay > log_warn_delay):
                            log_output_file.write("<--- Time delay " +
                                                  str(log_delay) + " > " +
                                                  str(log_warn_delay) +
                                                  " --->\n")

                    if (log_lines_printed < 1):
                        get_logger().info(line)
                    if (log_output_file != None):
                        log_output_file.write(output_line)
                        record_output_line_time(ref_time, line_time,
                                                log_line_occurrence_times)

                    prev_line = line
                    prev_line_time = line_time
                    log_lines_printed += 1
                    last_line_matches = True
                else:
                    last_line_matches = False

                if (end_time_within_run != None
                        and line_time > end_time_within_run):
                    break
            else:
                # write a statements without a valid timestamp if and only if the the most recent line with a valid time was printed
                # this allows multiline log statements such as exceptions to be printed entirely
                if (last_line_matches):
                    if (log_output_file != None):
                        log_output_file.write(output_line)

        get_logger().info("  ......")
        get_logger().info(prev_line)

        # close the output file if it was specified
        if (log_output_file != None):
            log_output_file.close()

        if (occurrence_plot_output_path != None):
            if (regex_filter == None):
                title = "Times of all log lines"
            else:
                title = "Times of log lines matching regex '" + regex_filter.pattern + "'"

            plot_log_line_occurrence(occurrence_plot_output_path, title,
                                     log_line_occurrence_times)