def aggregate_servers_correlations(dt_start, dt_end, metric, servers):
    str_dt = utils.get_str_dt(dt_start, dt_end)

    out_path = ("{}/prints/{}/filtered/{}/"
                "problem_location_zero_indegree_vertexes_correlation.csv".
                format(script_dir, str_dt, metric))
    with open(out_path, "w") as f:
        f.write("server,traceroute_type,cp_dt_start,cp_dt_end,cp_type,"
                "cnt_vertexes_with_zero_indegree,suffix_match,"
                "vertexes_with_zero_indegree\n")
        for server in servers:
            for traceroute_type in unsupervised_utils.iter_traceroute_types():
                if valid_graph(dt_start, dt_end, server, traceroute_type):
                    in_path = ("{}/plots/names/{}/{}/{}/{}/"
                               "problem_location_zero_indegree_vertexes_"
                               "correlation.csv".
                               format(script_dir, str_dt, metric,
                                      traceroute_type, server))
                    df = pd.read_csv(in_path)
                    for idx, row in df.iterrows():
                        f.write("{},{},{},{},{},{},\"{}\",\"{}\"\n".
                                format(server,
                                       row["traceroute_type"],
                                       row["cp_dt_start"],
                                       row["cp_dt_end"],
                                       row["cp_type"],
                                       row["cnt_vertexes_with_zero_indegree"],
                                       row["suffix_match"],
                                       row["vertexes_with_zero_indegree"]))
                    break

    utils.sort_csv_file(out_path,
                        ["cnt_vertexes_with_zero_indegree", "server"],
                        ascending=[False, True])
def aggregate_servers_first_hop_not_zero_indegree_vertex(dt_start, dt_end,
                                                         metric, servers):
    str_dt = utils.get_str_dt(dt_start, dt_end)

    out_path = ("{}/prints/{}/filtered/{}/"
                "problem_location_first_hop_not_zero_indegree_vertex.csv".
                format(script_dir, str_dt, metric))
    with open(out_path, "w") as f:
        f.write("server,cp_dt_start,cp_dt_end,cp_type,fraction_of_clients,"
                "cnt_clients,clients,problem_location\n")

        for server in servers:
            for traceroute_type in unsupervised_utils.iter_traceroute_types():
                if valid_graph(dt_start, dt_end, server, traceroute_type):
                    in_path = ("{}/plots/paths/{}/{}/{}/{}/"
                               "problem_location_first_hop_not_zero_indegree_"
                               "vertex.csv".
                               format(script_dir, str_dt, metric,
                                      traceroute_type,
                                      server))
                    df = pd.read_csv(in_path)
                    for idx, row in df.iterrows():
                        l_format = "{},{},{},{},{},{},\"{}\",\"{}\"\n"
                        f.write(l_format.format(server,
                                                row["cp_dt_start"],
                                                row["cp_dt_end"],
                                                row["cp_type"],
                                                row["fraction_of_clients"],
                                                row["cnt_clients"],
                                                row["clients"],
                                                row["problem_location"]))
                    break
Ejemplo n.º 3
0
def basic_statistics_per_batch():
    out_path = "{}/prints/basic_statistics_per_batch.csv".format(script_dir)
    with open(out_path, "w") as f:
        f.write("str_dt," "cnt_servers," "cnt_valid_clients," "cnt_clients\n")

        for str_dt in os.listdir("{}/prints".format(script_dir)):
            if os.path.isdir("{}/prints/{}".format(script_dir, str_dt)):
                servers = set()
                cnt_clients = 0
                cnt_valid_clients = 0

                in_path = (
                    "{}/prints/{}/filtered/traceroute_per_mac.csv".format(
                        script_dir, str_dt))
                df = pd.read_csv(in_path)
                for _, row in df.iterrows():
                    cnt_clients += 1
                    for traceroute_type in \
                            unsupervised_utils.iter_traceroute_types():
                        valid_traceroute_field, traceroute_field = \
                            cp_utils.get_traceroute_fields(traceroute_type)

                        if (row["valid_cnt_samples"]
                                and row[valid_traceroute_field]):
                            servers.add(row["server"])
                            cnt_valid_clients += 1
                            break
                cnt_servers = len(servers)

                l = "{}" + ",{}" * 3 + "\n"
                l = l.format(str_dt, cnt_servers, cnt_valid_clients,
                             cnt_clients)
                f.write(l)
def process_graphs(dt_start, dt_end):
    str_dt = utils.get_str_dt(dt_start, dt_end)

    out_dir = "{}/prints/{}/filtered/graph/".format(script_dir, str_dt)
    utils.create_dirs([out_dir])

    in_path = "{}/prints/{}/filtered/traceroute_per_mac.csv".format(
        script_dir, str_dt)
    servers = np.unique(pd.read_csv(in_path)["server"].values)

    for traceroute_type in unsupervised_utils.iter_traceroute_types():
        valid_traceroute_field, traceroute_field = \
            cp_utils.get_traceroute_fields(traceroute_type)

        for server in servers:
            utils.create_dirs([
                "{}/prints/{}/filtered/graph/".format(script_dir, str_dt),
                "{}/prints/{}/filtered/graph/{}".format(
                    script_dir, str_dt, server)
            ])

            out_dir = "{}/prints/{}/filtered/graph/{}".format(
                script_dir, str_dt, server)
            out_path = "{}/{}_graph.gv".format(out_dir, traceroute_field)

            name_neigh = get_graph(dt_start, dt_end, valid_traceroute_field,
                                   traceroute_field, server)
            write_graph(out_path, name_neigh)
            check_graph(out_dir, name_neigh, traceroute_field)
def plot_latencies_traceroute(dt_start, dt_end, preprocess_args):
    str_dt = utils.get_str_dt(dt_start, dt_end)

    in_path = "{}/prints/{}/filtered/traceroute_per_mac.csv".format(script_dir,
                                                                    str_dt)
    df = pd.read_csv(in_path)
    for _, row, in df.iterrows():
        if row["valid_cnt_samples"]:
            in_path = utils.get_in_path(row["server"], row["mac"], dt_start,
                                        dt_end)
            ts_traceroute = TimeSeries(in_path=in_path, metric="traceroute",
                                       dt_start=dt_start, dt_end=dt_end)

            for traceroute_type in unsupervised_utils.iter_traceroute_types():
                valid_traceroute_field, traceroute_field = \
                    cp_utils.get_traceroute_fields(traceroute_type)
                if row[valid_traceroute_field]:
                    traceroute = ast.literal_eval(row[traceroute_field])
                    name_ts = get_ts_per_name(traceroute_type, ts_traceroute,
                                              dt_start, dt_end)

                    dir_path = ("{}/plots/paths/{}/{}/{}/{}".
                                format(script_dir, str_dt, "latency",
                                       traceroute_type, row["server"]))
                    traceroute_path = "/".join(map(str,
                                                   list(reversed(traceroute))))
                    dir_path = "{}/{}".format(dir_path, traceroute_path)

                    utils.create_dirs(["{}/traceroute_latencies/".
                                       format(dir_path),
                                       "{}/traceroute_latencies/{}".
                                       format(dir_path, row["mac"])])

                    for i in range(len(traceroute) - 1):
                        name = traceroute[i][0][0]
                        traceroute_path = "hop{}_{}".format(str(i).zfill(2),
                                                            name)
                        out_path = ("{}/traceroute_latencies/{}/{}.png".
                                    format(dir_path, row["mac"],
                                           traceroute_path))

                        ts_preprocessed = name_ts[name].copy()
                        cp_utils.preprocess(ts_preprocessed, preprocess_args)

                        # plot_procedures.plot_ts_share_x(
                        #     name_ts[name],
                        #     ts_preprocessed,
                        #     out_path,
                        #     plot_type2="scatter",
                        #     title1="raw",
                        #     title2="median filtered",
                        #     default_ylabel=True)
                        ts_preprocessed.metric = "latency"
                        plot_procedures.plot_ts(ts_preprocessed, out_path,
                                                title="median filtered")
Ejemplo n.º 6
0
def plot_clients_per_zero_indegree_vertex_distribution(dt_start, dt_end):
    cnt_clients_zero_indegree_vertex = []

    str_dt = utils.get_str_dt(dt_start, dt_end)
    for server in os.listdir("{}/prints/{}/filtered/graph".format(
            script_dir, str_dt)):
        for traceroute_type in unsupervised_utils.iter_traceroute_types():
            if spatial_time_correlation.valid_graph(dt_start, dt_end, server,
                                                    traceroute_type):
                g = spatial_time_correlation.read_graph(
                    dt_start, dt_end, server, traceroute_type)
                u_indegree = spatial_time_correlation.get_indegree(g)

                for u in g:
                    if u_indegree[u] == 0:
                        in_path = ("{}/plots/names/{}/latency/{}/{}/{}/"
                                   "cps_per_mac.csv".format(
                                       script_dir, str_dt, traceroute_type,
                                       server, u))
                        df = pd.read_csv(in_path)
                        cnt_clients_zero_indegree_vertex.append(df.shape[0])
                break

    print sum(cnt_clients_zero_indegree_vertex)

    out_path = ("{}/plots/cnt_clients_zero_indegree_vertex_distribution.png".
                format(script_dir))
    plt.clf()
    matplotlib.rcParams.update({"font.size": 27})
    plt.gcf().set_size_inches(16, 11)
    bins = range(1, max(cnt_clients_zero_indegree_vertex) + 2)
    weights = (np.asarray([1.0] * len(cnt_clients_zero_indegree_vertex)) /
               len(cnt_clients_zero_indegree_vertex))
    plt.ylabel("frequency")
    plt.xlabel("number of clients in a zero indegree user-group")
    plt.xticks(bins[:-1], rotation=45)
    plt.hist(cnt_clients_zero_indegree_vertex,
             bins=bins,
             normed=True,
             weights=weights)
    plt.savefig(out_path)
def localize_events(dt_start, dt_end, metric, eps_hours,
                    min_fraction_of_clients):
    str_dt = utils.get_str_dt(dt_start, dt_end)

    in_path = "{}/prints/{}/filtered/traceroute_per_mac.csv".format(script_dir,
                                                                    str_dt)
    servers = np.unique(pd.read_csv(in_path)["server"].values)
    for server in servers:
        for traceroute_type in unsupervised_utils.iter_traceroute_types():
            if valid_graph(dt_start, dt_end, server, traceroute_type):
                g = read_graph(dt_start, dt_end, server, traceroute_type)
                u_indegree = get_indegree(g)

                for u in g:
                    if u_indegree[u] == 0:
                        analyse_first_hop(g, u, True, metric, server, dt_start,
                                          dt_end, traceroute_type, eps_hours,
                                          min_fraction_of_clients)

                correlate_zero_indegree_vertexes(g, u_indegree, server,
                                                 dt_start, dt_end, metric,
                                                 traceroute_type, eps_hours)

                first_hops = get_first_hops(dt_start, dt_end, server,
                                            traceroute_type)
                for first_hop in first_hops:
                    if u_indegree[first_hop] != 0:
                        analyse_first_hop(g, u, False, metric, server,
                                          dt_start, dt_end, traceroute_type,
                                          eps_hours, min_fraction_of_clients)
                aggregate_first_hop_not_zero_indegree_vertex(first_hops, g,
                                                             metric, server,
                                                             dt_start, dt_end,
                                                             traceroute_type)

                break

    aggregate_servers_correlations(dt_start, dt_end, metric, servers)
    aggregate_servers_first_hop_not_zero_indegree_vertex(dt_start, dt_end,
                                                         metric, servers)
Ejemplo n.º 8
0
def plot_per_name(dt_start, dt_end, metric, preprocess_args, plot_cps=True):
    dt_dir = utils.get_dt_dir(dt_start, dt_end)
    str_dt = utils.get_str_dt(dt_start, dt_end)

    utils.create_dirs([
        "{}/plots/".format(script_dir), "{}/plots/names".format(script_dir),
        "{}/plots/names/{}".format(script_dir, str_dt),
        "{}/plots/names/{}/{}".format(script_dir, str_dt, metric)
    ])

    client_cps = unsupervised_utils.get_client_cps(plot_cps, str_dt, metric)

    # avoid reploting
    client_plotPath = {}

    for traceroute_type in unsupervised_utils.iter_traceroute_types():
        valid_traceroute_field, traceroute_field = \
            cp_utils.get_traceroute_fields(traceroute_type)

        utils.create_dirs([
            "{}/plots/names/{}/{}/{}".format(script_dir, str_dt, metric,
                                             traceroute_type)
        ])

        df = pd.read_csv("{}/prints/{}/filtered/traceroute_per_mac.csv".format(
            script_dir, str_dt))
        cnt = 0
        for idx, row in df.iterrows():
            if row["valid_cnt_samples"] and row[valid_traceroute_field]:
                print("cnt={}, traceroute_type={}, str_dt={}".format(
                    cnt, traceroute_type, str_dt))
                cnt += 1

                client = utils.get_client(row["server"], row["mac"])

                for name in cp_utils.iter_names_traceroute_filtered(
                        ast.literal_eval(row[traceroute_field])):

                    utils.create_dirs([
                        "{}/plots/names/{}/{}/{}/{}".format(
                            script_dir, str_dt, metric, traceroute_type,
                            row["server"]),
                        "{}/plots/names/{}/{}/{}/{}/{}".format(
                            script_dir, str_dt, metric, traceroute_type,
                            row["server"], name)
                    ])

                    out_file_name = utils.get_out_file_name(
                        row["server"], row["mac"], dt_start, dt_end)
                    out_path = ("{}/plots/names/{}/{}/{}/{}/{}/{}.png".format(
                        script_dir, str_dt, metric, traceroute_type,
                        row["server"], name, out_file_name))

                    # avoid reploting
                    if client in client_plotPath:
                        shutil.copyfile(client_plotPath[client], out_path)
                    else:
                        client_plotPath[client] = out_path
                        cp_dts = client_cps[client]

                        in_path = "{}/input/{}/{}/{}.csv".format(
                            base_dir, dt_dir, row["server"], row["mac"])

                        ts = TimeSeries(in_path, metric, dt_start, dt_end)
                        cp_utils.preprocess(ts, preprocess_args)
                        plot_procedures.plot_ts(ts,
                                                out_path,
                                                dt_axvline=cp_dts,
                                                title="median filtered")