def aggregate_servers_correlations(dt_start, dt_end, metric, servers): str_dt = utils.get_str_dt(dt_start, dt_end) out_path = ("{}/prints/{}/filtered/{}/" "problem_location_zero_indegree_vertexes_correlation.csv". format(script_dir, str_dt, metric)) with open(out_path, "w") as f: f.write("server,traceroute_type,cp_dt_start,cp_dt_end,cp_type," "cnt_vertexes_with_zero_indegree,suffix_match," "vertexes_with_zero_indegree\n") for server in servers: for traceroute_type in unsupervised_utils.iter_traceroute_types(): if valid_graph(dt_start, dt_end, server, traceroute_type): in_path = ("{}/plots/names/{}/{}/{}/{}/" "problem_location_zero_indegree_vertexes_" "correlation.csv". format(script_dir, str_dt, metric, traceroute_type, server)) df = pd.read_csv(in_path) for idx, row in df.iterrows(): f.write("{},{},{},{},{},{},\"{}\",\"{}\"\n". format(server, row["traceroute_type"], row["cp_dt_start"], row["cp_dt_end"], row["cp_type"], row["cnt_vertexes_with_zero_indegree"], row["suffix_match"], row["vertexes_with_zero_indegree"])) break utils.sort_csv_file(out_path, ["cnt_vertexes_with_zero_indegree", "server"], ascending=[False, True])
def aggregate_servers_first_hop_not_zero_indegree_vertex(dt_start, dt_end, metric, servers): str_dt = utils.get_str_dt(dt_start, dt_end) out_path = ("{}/prints/{}/filtered/{}/" "problem_location_first_hop_not_zero_indegree_vertex.csv". format(script_dir, str_dt, metric)) with open(out_path, "w") as f: f.write("server,cp_dt_start,cp_dt_end,cp_type,fraction_of_clients," "cnt_clients,clients,problem_location\n") for server in servers: for traceroute_type in unsupervised_utils.iter_traceroute_types(): if valid_graph(dt_start, dt_end, server, traceroute_type): in_path = ("{}/plots/paths/{}/{}/{}/{}/" "problem_location_first_hop_not_zero_indegree_" "vertex.csv". format(script_dir, str_dt, metric, traceroute_type, server)) df = pd.read_csv(in_path) for idx, row in df.iterrows(): l_format = "{},{},{},{},{},{},\"{}\",\"{}\"\n" f.write(l_format.format(server, row["cp_dt_start"], row["cp_dt_end"], row["cp_type"], row["fraction_of_clients"], row["cnt_clients"], row["clients"], row["problem_location"])) break
def basic_statistics_per_batch(): out_path = "{}/prints/basic_statistics_per_batch.csv".format(script_dir) with open(out_path, "w") as f: f.write("str_dt," "cnt_servers," "cnt_valid_clients," "cnt_clients\n") for str_dt in os.listdir("{}/prints".format(script_dir)): if os.path.isdir("{}/prints/{}".format(script_dir, str_dt)): servers = set() cnt_clients = 0 cnt_valid_clients = 0 in_path = ( "{}/prints/{}/filtered/traceroute_per_mac.csv".format( script_dir, str_dt)) df = pd.read_csv(in_path) for _, row in df.iterrows(): cnt_clients += 1 for traceroute_type in \ unsupervised_utils.iter_traceroute_types(): valid_traceroute_field, traceroute_field = \ cp_utils.get_traceroute_fields(traceroute_type) if (row["valid_cnt_samples"] and row[valid_traceroute_field]): servers.add(row["server"]) cnt_valid_clients += 1 break cnt_servers = len(servers) l = "{}" + ",{}" * 3 + "\n" l = l.format(str_dt, cnt_servers, cnt_valid_clients, cnt_clients) f.write(l)
def process_graphs(dt_start, dt_end): str_dt = utils.get_str_dt(dt_start, dt_end) out_dir = "{}/prints/{}/filtered/graph/".format(script_dir, str_dt) utils.create_dirs([out_dir]) in_path = "{}/prints/{}/filtered/traceroute_per_mac.csv".format( script_dir, str_dt) servers = np.unique(pd.read_csv(in_path)["server"].values) for traceroute_type in unsupervised_utils.iter_traceroute_types(): valid_traceroute_field, traceroute_field = \ cp_utils.get_traceroute_fields(traceroute_type) for server in servers: utils.create_dirs([ "{}/prints/{}/filtered/graph/".format(script_dir, str_dt), "{}/prints/{}/filtered/graph/{}".format( script_dir, str_dt, server) ]) out_dir = "{}/prints/{}/filtered/graph/{}".format( script_dir, str_dt, server) out_path = "{}/{}_graph.gv".format(out_dir, traceroute_field) name_neigh = get_graph(dt_start, dt_end, valid_traceroute_field, traceroute_field, server) write_graph(out_path, name_neigh) check_graph(out_dir, name_neigh, traceroute_field)
def plot_latencies_traceroute(dt_start, dt_end, preprocess_args): str_dt = utils.get_str_dt(dt_start, dt_end) in_path = "{}/prints/{}/filtered/traceroute_per_mac.csv".format(script_dir, str_dt) df = pd.read_csv(in_path) for _, row, in df.iterrows(): if row["valid_cnt_samples"]: in_path = utils.get_in_path(row["server"], row["mac"], dt_start, dt_end) ts_traceroute = TimeSeries(in_path=in_path, metric="traceroute", dt_start=dt_start, dt_end=dt_end) for traceroute_type in unsupervised_utils.iter_traceroute_types(): valid_traceroute_field, traceroute_field = \ cp_utils.get_traceroute_fields(traceroute_type) if row[valid_traceroute_field]: traceroute = ast.literal_eval(row[traceroute_field]) name_ts = get_ts_per_name(traceroute_type, ts_traceroute, dt_start, dt_end) dir_path = ("{}/plots/paths/{}/{}/{}/{}". format(script_dir, str_dt, "latency", traceroute_type, row["server"])) traceroute_path = "/".join(map(str, list(reversed(traceroute)))) dir_path = "{}/{}".format(dir_path, traceroute_path) utils.create_dirs(["{}/traceroute_latencies/". format(dir_path), "{}/traceroute_latencies/{}". format(dir_path, row["mac"])]) for i in range(len(traceroute) - 1): name = traceroute[i][0][0] traceroute_path = "hop{}_{}".format(str(i).zfill(2), name) out_path = ("{}/traceroute_latencies/{}/{}.png". format(dir_path, row["mac"], traceroute_path)) ts_preprocessed = name_ts[name].copy() cp_utils.preprocess(ts_preprocessed, preprocess_args) # plot_procedures.plot_ts_share_x( # name_ts[name], # ts_preprocessed, # out_path, # plot_type2="scatter", # title1="raw", # title2="median filtered", # default_ylabel=True) ts_preprocessed.metric = "latency" plot_procedures.plot_ts(ts_preprocessed, out_path, title="median filtered")
def plot_clients_per_zero_indegree_vertex_distribution(dt_start, dt_end): cnt_clients_zero_indegree_vertex = [] str_dt = utils.get_str_dt(dt_start, dt_end) for server in os.listdir("{}/prints/{}/filtered/graph".format( script_dir, str_dt)): for traceroute_type in unsupervised_utils.iter_traceroute_types(): if spatial_time_correlation.valid_graph(dt_start, dt_end, server, traceroute_type): g = spatial_time_correlation.read_graph( dt_start, dt_end, server, traceroute_type) u_indegree = spatial_time_correlation.get_indegree(g) for u in g: if u_indegree[u] == 0: in_path = ("{}/plots/names/{}/latency/{}/{}/{}/" "cps_per_mac.csv".format( script_dir, str_dt, traceroute_type, server, u)) df = pd.read_csv(in_path) cnt_clients_zero_indegree_vertex.append(df.shape[0]) break print sum(cnt_clients_zero_indegree_vertex) out_path = ("{}/plots/cnt_clients_zero_indegree_vertex_distribution.png". format(script_dir)) plt.clf() matplotlib.rcParams.update({"font.size": 27}) plt.gcf().set_size_inches(16, 11) bins = range(1, max(cnt_clients_zero_indegree_vertex) + 2) weights = (np.asarray([1.0] * len(cnt_clients_zero_indegree_vertex)) / len(cnt_clients_zero_indegree_vertex)) plt.ylabel("frequency") plt.xlabel("number of clients in a zero indegree user-group") plt.xticks(bins[:-1], rotation=45) plt.hist(cnt_clients_zero_indegree_vertex, bins=bins, normed=True, weights=weights) plt.savefig(out_path)
def localize_events(dt_start, dt_end, metric, eps_hours, min_fraction_of_clients): str_dt = utils.get_str_dt(dt_start, dt_end) in_path = "{}/prints/{}/filtered/traceroute_per_mac.csv".format(script_dir, str_dt) servers = np.unique(pd.read_csv(in_path)["server"].values) for server in servers: for traceroute_type in unsupervised_utils.iter_traceroute_types(): if valid_graph(dt_start, dt_end, server, traceroute_type): g = read_graph(dt_start, dt_end, server, traceroute_type) u_indegree = get_indegree(g) for u in g: if u_indegree[u] == 0: analyse_first_hop(g, u, True, metric, server, dt_start, dt_end, traceroute_type, eps_hours, min_fraction_of_clients) correlate_zero_indegree_vertexes(g, u_indegree, server, dt_start, dt_end, metric, traceroute_type, eps_hours) first_hops = get_first_hops(dt_start, dt_end, server, traceroute_type) for first_hop in first_hops: if u_indegree[first_hop] != 0: analyse_first_hop(g, u, False, metric, server, dt_start, dt_end, traceroute_type, eps_hours, min_fraction_of_clients) aggregate_first_hop_not_zero_indegree_vertex(first_hops, g, metric, server, dt_start, dt_end, traceroute_type) break aggregate_servers_correlations(dt_start, dt_end, metric, servers) aggregate_servers_first_hop_not_zero_indegree_vertex(dt_start, dt_end, metric, servers)
def plot_per_name(dt_start, dt_end, metric, preprocess_args, plot_cps=True): dt_dir = utils.get_dt_dir(dt_start, dt_end) str_dt = utils.get_str_dt(dt_start, dt_end) utils.create_dirs([ "{}/plots/".format(script_dir), "{}/plots/names".format(script_dir), "{}/plots/names/{}".format(script_dir, str_dt), "{}/plots/names/{}/{}".format(script_dir, str_dt, metric) ]) client_cps = unsupervised_utils.get_client_cps(plot_cps, str_dt, metric) # avoid reploting client_plotPath = {} for traceroute_type in unsupervised_utils.iter_traceroute_types(): valid_traceroute_field, traceroute_field = \ cp_utils.get_traceroute_fields(traceroute_type) utils.create_dirs([ "{}/plots/names/{}/{}/{}".format(script_dir, str_dt, metric, traceroute_type) ]) df = pd.read_csv("{}/prints/{}/filtered/traceroute_per_mac.csv".format( script_dir, str_dt)) cnt = 0 for idx, row in df.iterrows(): if row["valid_cnt_samples"] and row[valid_traceroute_field]: print("cnt={}, traceroute_type={}, str_dt={}".format( cnt, traceroute_type, str_dt)) cnt += 1 client = utils.get_client(row["server"], row["mac"]) for name in cp_utils.iter_names_traceroute_filtered( ast.literal_eval(row[traceroute_field])): utils.create_dirs([ "{}/plots/names/{}/{}/{}/{}".format( script_dir, str_dt, metric, traceroute_type, row["server"]), "{}/plots/names/{}/{}/{}/{}/{}".format( script_dir, str_dt, metric, traceroute_type, row["server"], name) ]) out_file_name = utils.get_out_file_name( row["server"], row["mac"], dt_start, dt_end) out_path = ("{}/plots/names/{}/{}/{}/{}/{}/{}.png".format( script_dir, str_dt, metric, traceroute_type, row["server"], name, out_file_name)) # avoid reploting if client in client_plotPath: shutil.copyfile(client_plotPath[client], out_path) else: client_plotPath[client] = out_path cp_dts = client_cps[client] in_path = "{}/input/{}/{}/{}.csv".format( base_dir, dt_dir, row["server"], row["mac"]) ts = TimeSeries(in_path, metric, dt_start, dt_end) cp_utils.preprocess(ts, preprocess_args) plot_procedures.plot_ts(ts, out_path, dt_axvline=cp_dts, title="median filtered")