def test_performance(): g = Graph(get_random_connected_graph(1000, 0.1, 0)) sampler = WilsonSampler(g) start = time.time_ns() for _ in range(100): sampler.sample() end = time.time_ns() elapsed = end - start print(f"WilsonSampler took {elapsed} ns for 1000 samples")
def run_compare(n, density, seed, max_degree): print( f"Running with n: {n}, density: {density}, seed: {seed}, max_degree: {max_degree}" ) adj_list = get_random_connected_graph(n, density, seed=seed, max_degree=max_degree) count_mtt(adj_list) count_approx(adj_list)
def unit_test_1(): n = 10 p = 0.3 seed = 1 g = random_graphs.get_random_connected_graph(n, p) # todo: add seed num_samples = 100 sampler = st_sampler.STSampler( {}) # empty sampler, it will be initialized within fn later use_log = False nst = approx_count_st_testing_ver(g, sampler, num_samples, use_log) print(nst)
def make_row(n, density, num_samples): g = random_graphs.get_random_connected_graph(n, density) t0 = time.time() est = approx_count_log(g, num_samples) t1 = time.time() act = mtt.MTT(g, log=True) error = mult_error_log(float(est), act) print(f'actual = {act}, est = {est}, error = {error}') row = [n, density, act, float(est), error, t1 - t0] return row
def unit_test_2(): n = 30 p = 0.3 seed = 1 g = random_graphs.get_random_connected_graph(n, p) # todo: add seed sampler = st_sampler.STSampler( {}) # empty sampler, it will be initialized within fn later num_edges_each_time_fn_1 = lambda x, y: 1 # one edge each time num_samples_fn_1 = lambda x, y: 200 # 100 samples each time num_edges_each_time_fn_2 = lambda x, y: 3 # 3 edges each time num_samples_fn_2 = lambda x, y: 100 + 30 * y # 100 samples as base, and for every edge, add 10 samples use_log = False nst = approx_count_st_generic(g, sampler, num_samples_fn_2, num_edges_each_time_fn_2, use_log) actual = mtt.MTT(g) print('error =', abs(nst - actual) / actual)
def test_accuracy(n, density, seed, max_degree): print( f"Running with n: {n}, density: {density}, seed: {seed}, max_degree: {max_degree}" ) adj_list = get_random_connected_graph(n, density, seed, max_degree=max_degree) g = Graph(adj_list) g_edges = g.get_all_edge_indcies() edge_idx = g_edges[0] u, v = g.get_edge(edge_idx) # test_neighborhood(g, edge_idx) # calculate the actual p total_st = MTT(adj_list, use_log=True) # use a copy of the graph since we need the unmodified graph later g_copy = Graph(adj_list) g_copy.contract(g_copy.get_edge_between_vertices(u, v)) with_e_st = MTT(g_copy.to_adj_list(), use_log=True) p = with_e_st - total_st explore_factor = 1 while explore_factor < 10: start = time.time_ns() neighborhood_edges = get_neighborhood(g, edge_idx, explore_factor) neighborhood = edges_to_adj_list(g, neighborhood_edges) neighborhood_st = MTT(neighborhood, use_log=True) neighborhood_g = Graph(neighborhood) neighborhood_edge_idx = neighborhood_g.get_edge_between_vertices(u, v) neighborhood_g.contract(neighborhood_edge_idx) neighborhood_with_e = neighborhood_g.to_adj_list() neighborhood_with_e_st = MTT(neighborhood_with_e, use_log=True) end = time.time_ns() elapsed_ms = (end - start) / (10**6) p_est = neighborhood_with_e_st - neighborhood_st error = abs(1 - exp(p - p_est)) neighborhood_edge_size = len(neighborhood_edges) neighborhood_vertex_size = len(neighborhood) print( f"with depth {explore_factor} achieved error {error:.16f} ({elapsed_ms} ms) (neighborhood vertices: {neighborhood_vertex_size}) (neighborhood edges: {neighborhood_edge_size})" ) explore_factor += 1
def test_neighborhood_size(): for n in [100, 500, 1000, 5000, 10000]: adj_list = get_random_connected_graph(n, 0.1, seed=0, max_degree=5) g = Graph(adj_list) g_edges = g.get_all_edge_indcies() edge_idx = g_edges[0] u, v = g.get_edge(edge_idx) # calculate the actual p total_st = MTT(adj_list, use_log=True) # use a copy of the graph since we need the unmodified graph later g_copy = Graph(adj_list) g_copy.contract(g_copy.get_edge_between_vertices(u, v)) with_e_st = MTT(g_copy.to_adj_list(), use_log=True) p = with_e_st - total_st explore_factor = 1 threshold = 0.001 while True: start = time.time_ns() neighborhood_edges = get_neighborhood(g, edge_idx, explore_factor) neighborhood = edges_to_adj_list(g, neighborhood_edges) neighborhood_st = MTT(neighborhood, use_log=True) neighborhood_g = Graph(neighborhood) neighborhood_edge_idx = neighborhood_g.get_edge_between_vertices( u, v) neighborhood_g.contract(neighborhood_edge_idx) neighborhood_with_e = neighborhood_g.to_adj_list() neighborhood_with_e_st = MTT(neighborhood_with_e, use_log=True) end = time.time_ns() elapsed_ms = (end - start) / (10**6) p_est = neighborhood_with_e_st - neighborhood_st error = abs(1 - exp(p - p_est)) neighborhood_vertex_size = len(neighborhood) if error < threshold: print( f"for n = {n}, hit threshold of {threshold} with explore depth {explore_factor} ({elapsed_ms} ms) (neighberhood size: {neighborhood_vertex_size})" ) break explore_factor += 1
def unit_test(): n = 8 d = 0.3 g = random_graphs.get_random_connected_graph(n, d) sampler = st_sampler.STSampler(g) test_for_uniform_dist(g, sampler)
def test(): g = random_graphs.get_random_connected_graph(20, 0.6) nst = mtt.MTT(g) est = approx_count_iter(g, 300, 300) error = abs(est - nst) / nst * 100 print(f'Final: actual = {nst}, estimated = {est}, error = {error}')
# densities_list = [0.3, 0.5, 0.7] # graph_number_list = list(range(1, 6)) try: os.mkdir(subdir) print("Directory ", subdir, " Created ") except FileExistsError: print("Directory ", subdir, " already exists") for n in num_vertices_list: for p in densities_list: for i in graph_number_list: # i gonna make 10 graphs of the same type (num_vertices and density) filename = 'g{}_{}_{}.json'.format(i, n, int(100 * p)) path = os.path.join(subdir, filename) g1 = random_graphs.get_random_connected_graph(n, p) u, v = get_random_edge(g1) g2 = copy.deepcopy(g1) g2[u].remove( v ) # remove (u,v) from g2. there is a small chance that this disconnects the graph g2[v].remove(u) NST1 = mtt.MTT(g1) NST2 = mtt.MTT(g2) data = [g1, NST1, g2, NST2, [u, v]] db.save_data(data, path)