def helper_clarans_plugin(): # n, k, plug_in_oracle, oracle, num_local=None, max_neighbour=None) global g, g_mat, order_val, full_mat, count k = 5 pr = clarans_vanila(oracle, order_val, k) print("ACTUAL: ", pr.centroids) count = 0 start = time.time() obj_sw = SashaWang() obj_sw.store(g, order_val) oracle_plugin = obj_sw p = clarans_plugin(order_val, k, oracle_plugin, oracle) end = time.time() print("SW", p.centroids) print("COUNT Sasha Wang", count, end - start) start = time.time() count = 0 obj = unified_graph_lb_ub() obj.store(g, order_val) oracle_plugin = obj p = clarans_plugin(order_val, k, oracle_plugin, oracle) end = time.time() print("LBT", p.centroids) print("COUNT LBTree enabled", count, end - start, "\n\n") start = time.time() count = 0 obj = ParamTriSearch(2, obj_sw.ub_matrix) obj.store(g, order_val) oracle_plugin = obj p = clarans_plugin(order_val, k, oracle_plugin, oracle) end = time.time() print("PARA", p.centroids) print("PARA", count, end - start, "\n\n")
def helper_kruskals_plugin(): global g, g_mat, order_val, full_mat, count count = 0 start = time.time() obj_sw = SashaWang() obj_sw.store(g, order_val) oracle_plugin = obj_sw p = kruskals_plugin(order_val, oracle, oracle_plugin) p.mst() end = time.time() print('COUNT Sasha Wang', count, end - start, "\n\n") start = time.time() count = 0 obj = unified_graph_lb_ub() obj.store(g, order_val) oracle_plugin = obj p = kruskals_plugin(order_val, oracle, oracle_plugin) p.mst() end = time.time() print("COUNT LBTree enabled", count, end - start, "\n\n") start = time.time() count = 0 obj = ParamTriSearch(2, obj_sw.ub_matrix) obj.store(g, order_val) oracle_plugin = obj p = kruskals_plugin(order_val, oracle, oracle_plugin) p.mst() end = time.time() print("PARA", count, end - start, "\n\n")
def prims_SW(g, pr, measure, kind, order_val, timer): # Sasha Wang algorithm print("Experiment Starting Sasha Wang (Prims)\n") global full_mat, count # g = {} timer.start() obj_sw = SashaWang() obj_sw.store(g, order_val) oracle_plugin = obj_sw p = prims_plugin(order_val, oracle, oracle_plugin) p.mst(0) timer.end() assert abs(p.mst_path_length - pr.mst_path_length) < 0.000001 print( "Plugin with Sasha Wang Experiments\nActual(SW) Prims Path Length: {}\nSasha Wang Prims Path Length: {}\n" "measure: {}, kind: {}, order_val: {}".format(p.mst_path_length, pr.mst_path_length, measure, kind, order_val)) sasha_wang_results = "COUNT Sasha Wang " + str(count) + " Time " + str( timer.time_elapsed - obj_sw.update_time) + "\n" print("COUNT Sasha Wang: {}, Time(total): {}, Time(SP): {}\n\n".format( count, timer.time_elapsed, obj_sw.update_time))
def helper_sasha_wang_saver(id_graph_to_run, type_of_graph_id): global g, g_mat, order_val, full_mat, count graph = dict() graph[0] = [ 'normal_distances_Geometric_512.pkl', 'normal_distances_Renyi Erdos_512.pkl', 'normal_distances_ForrestFire_512.pkl', 'normal_distances_Barabasi_512.pkl' ] graph[1] = [ 'uniform_distances_Geometric_512.pkl', 'uniform_distances_Renyi Erdos_512.pkl', 'uniform_distances_ForrestFire_512.pkl', 'uniform_distances_Barabasi_512.pkl' ] graph[2] = [ 'zipf_distances_Geometric_512.pkl', 'zipf_distances_Renyi Erdos_512.pkl', 'zipf_distances_ForrestFire_512.pkl', 'zipf_distances_Barabasi_512.pkl' ] outs = ['normal512.pkl', 'uniform512.pkl', 'zipf512.pkl'] full_mat = pickle.load( open(os.path.join('igraph', outs[id_graph_to_run]), 'rb')) order_val = full_mat.shape[0] graph = graph[id_graph_to_run][type_of_graph_id] print(graph) ub_out_name = os.path.join( "LB_UB", '_'.join([ 'ub_sw', str(graph.split('_')[0]), graph.split('_')[2], graph.split('_')[3] ])) lb_out_name = os.path.join( "LB_UB", '_'.join([ 'lb_sw', str(graph.split('_')[0]), graph.split('_')[2], graph.split('_')[3] ])) g = pickle.load(open(os.path.join('igraph', graph), 'rb')) count = 0 print("Graph Chosen; SW Starting.") start = time.time() obj_sw = SashaWang() obj_sw.store(g, order_val) end = time.time() print( "Time for SW for algorithm(", '_'.join([ str(graph.split('_')[0]), graph.split('_')[2], graph.split('_')[3] ]), "): ", (end - start)) pickle.dump(np.array(obj_sw.ub_matrix), open(ub_out_name, 'wb')) pickle.dump(np.array(obj_sw.lb_matrix), open(lb_out_name, 'wb'))
def helper_tester(order_val, measure, kind): global full_mat, count distance_measure = ['normal', 'uniform', 'zipf'] generation_algorithms = [ 'Geometric', 'Renyi Erdos', 'ForrestFire', 'Barabasi' ] full_mat_name = distance_measure[measure] + str(order_val) + '.pkl' g_name = distance_measure[measure] + '_distances_' + generation_algorithms[ kind] + '_' + str(order_val) + '.pkl' g = pickle.load(open(os.path.join("igraph", g_name), 'rb')) full_mat = pickle.load(open(os.path.join("igraph", full_mat_name), 'rb')) g_mat = _get_matrix_from_adj_dict(g, order_val) count = 0 obj_sw = SashaWang() start = time.time() obj_sw.store(g, order_val) sw_time = time.time() - start obj = unified_graph_lb_ub() start = time.time() obj.store(g, order_val) our_time = time.time() - start start = time.time() obj_tri = ParamTriSearch(2, None) obj_tri.store(g, order_val) tri_time = time.time() - start average_lbt = np.average( np.array(obj_sw.lb_matrix) - np.array(obj.lb_matrix)) mean_sw = np.average(np.array(obj_sw.lb_matrix)) mean_ours = np.average(np.array(obj.lb_matrix)) mean_tri = np.average(np.array(obj_tri.lb_matrix)) average_original = np.average(full_mat - np.array(obj_sw.lb_matrix)) results_file_name = "Error_" + str(order_val) + "_" \ + generation_algorithms[kind] + "_" \ + distance_measure[measure] + "_" + ".txt" lbt_results = " Time SW: " \ + str(sw_time) + "\n"\ + " Time lbub: " + str(our_time) + "\n"\ + " Average Error with LBT " + str(average_lbt) + "\n" \ + " Average Error with Original " + str(average_original) \ + " Mean SW " + str(mean_sw) \ + " Mean SW " + str(mean_ours) \ + " Mean TriSearch " + str(mean_tri) \ + " Time Tri " + str(tri_time) \ + "\n" print(lbt_results) f = open(os.path.join("quality", results_file_name), "w+") f.write(lbt_results) f.write("\n") f.close()
def prims_SW(g, pr, order_val, timer, oracle, bounder=False): # Sasha Wang algorithm print("Experiment Starting Sasha Wang (Prims)\n") global full_mat, count # g = {} timer.start() obj_sw = SashaWang() obj_sw.store(g, order_val) oracle_plugin = obj_sw # oracle = flicker_oracle() p = prims_plugin(order_val, oracle, oracle_plugin) p.mst(0) timer.end() print("value from SW: {}".format(p.mst_path_length)) assert abs(p.mst_path_length - pr.mst_path_length) < 0.000001 print( "Plugin with Sasha Wang Experiments\nActual(SW) Prims Path Length: {}\nSasha Wang Prims Path Length: {}\n" "order_val: {}".format(p.mst_path_length, pr.mst_path_length, order_val)) sasha_wang_results = "COUNT Sasha Wang " + str(count) + " Time " + str( timer.time_elapsed - obj_sw.update_time) + "\n" print("COUNT Sasha Wang: {}, Time(total): {}, Time(SP): {}\n\n".format( count, timer.time_elapsed, obj_sw.update_time)) if bounder: lb = [] ub = [] lb_name = 'lower_bounds_{}_sw.lb'.format(order_val) ub_name = 'upper_bounds_{}_sw.ub'.format(order_val) for i in range(order_val): for j in range(i + 1, order_val): a, b = p.plug_in_oracle.lookup(i, j) lb.append(a) ub.append(b) path_out = os.path.join(os.getcwd(), "bounds_compare_results", "bounds_{}_sw".format(order_val)) if not os.path.exists(path_out): os.makedirs(path_out) with open(os.path.join(path_out, lb_name), 'w') as f: f.write('\n'.join([str(b) for b in lb])) with open(os.path.join(path_out, ub_name), 'w') as f: f.write('\n'.join([str(b) for b in ub])) with open("dft_scale.res", 'a+') as f: f.write("Order Val: {}\n".format(order_val)) f.write( "COUNT Sasha Wang: {}, Time(total): {}, Time(SP): {}\n\n".format( count, timer.time_elapsed, obj_sw.update_time))
def clarans_SW(pr, measure, kind, order_val, timer, algo, k): print("Experiment Starting SW (CLARANS)\n") global full_mat, count g = {} timer.start() obj_sw = SashaWang() obj_sw.store(g, order_val) oracle_plugin = obj_sw p = clarans_plugin(order_val, k, oracle_plugin, oracle) print("Plug-in(CLA): ", p.centroids) timer.end() sasha_wang_results = "COUNT Sasha Wang " + str(count) + " Time " + str( timer.time_elapsed - obj_sw.update_time) + "\n" print("COUNT Sasha Wang: {}, Time(total): {}, Time(SP): {}\n\n".format(count, timer.time_elapsed, obj_sw.update_time))
def navarro_SW(pr, measure, kind, order_val, timer, algo, k): print("Experiment Starting SW (NAVARRO)\n") global full_mat, count g = {} timer.start() obj_sw = SashaWang() obj_sw.store(g, order_val) oracle_plugin = obj_sw p = knnrp(order_val, k, oracle, oracle_plugin) p.knn_queries() # print("Plug-in(Nav): ", p.NHA) timer.end() sasha_wang_results = "COUNT Sasha Wang " + str(count) + " Time " + str( timer.time_elapsed - obj_sw.update_time) + "\n" print("COUNT Sasha Wang: {}, Time(total): {}, Time(SP): {}\n\n".format(count, timer.time_elapsed, obj_sw.update_time))
def kruskals_SW(pr, measure, kind, order_val, timer, algo): print("Experiment Starting SW (Kruskals)\n") global full_mat, count g = {} timer.start() obj_sw = SashaWang() obj_sw.store(g, order_val) oracle_plugin = obj_sw p = kruskals_plugin(order_val, oracle, oracle_plugin) p.mst() timer.end() print("(KRUSKAL)Sasha Wang - Original Length: {}, our lenght: {}, measure: {}, kind: {}, order_val: {}". format(pr.mst_path_length, p.mst_path_length, measure, kind, order_val)) assert abs(p.mst_path_length - pr.mst_path_length) < 0.000001 sasha_wang_results = "COUNT Sasha Wang " + str(count) + " Time " + str( timer.time_elapsed - obj_sw.update_time) + "\n" print("COUNT Sasha Wang: {}, Time(total): {}, Time(SP): {}\n\n".format(count, timer.time_elapsed, obj_sw.update_time))
def helper_pam_plugin(k=5): global g, g_mat, order_val, full_mat, count centroids = sample(list(range(order_val)), k) copy_centroids = copy.copy(centroids) count = 0 centroids = copy.copy(copy_centroids) start = time.time() obj_sw = SashaWang() obj_sw.store(g, order_val) oracle_plugin = obj_sw p = pam_plugin(oracle, oracle_plugin, order_val, k, centroids) end = time.time() print("SW", p.centroids) print("COUNT Sasha Wang", count, end - start) centroids = copy.copy(copy_centroids) start = time.time() count = 0 obj = unified_graph_lb_ub() obj.store(g, order_val) oracle_plugin = obj p = pam_plugin(oracle, oracle_plugin, order_val, k, centroids) end = time.time() print("LBT", p.centroids) print("COUNT LBTree enabled", count, end - start, "\n\n") centroids = copy.copy(copy_centroids) start = time.time() count = 0 obj = ParamTriSearch(2, obj_sw.ub_matrix) obj.store(g, order_val) oracle_plugin = obj p = pam_plugin(oracle, oracle_plugin, order_val, k, centroids) end = time.time() print("PARA", p.centroids) print("PARA", count, end - start, "\n\n") centroids = copy.copy(copy_centroids) p = pam_vanila(oracle, order_val, k, centroids) print("Original: ", p.centroids)
def helper_prims_plugin(): global g, g_mat, order_val, full_mat, count count = 0 obj_sw = SashaWang() obj_sw.store(g, order_val) obj = unified_graph_lb_ub() obj.store(g, order_val) assert not np.any( np.abs(np.array(obj_sw.lb_matrix) - np.array(obj.lb_matrix)) < -0.0000001) start = time.time() obj_sw = SashaWang() obj_sw.store(g, order_val) oracle_plugin = obj_sw p = prims_plugin(order_val, oracle, oracle_plugin) p.mst(0) end = time.time() print("COUNT Sasha Wang", count, end - start, "\n\n") count = 0 start = time.time() obj = unified_graph_lb_ub() obj.store(g, order_val) oracle_plugin = obj p = prims_plugin(order_val, oracle, oracle_plugin) p.mst(0) end = time.time() print("COUNT LBTree enabled", count, end - start, "\n\n") start = time.time() count = 0 obj = ParamTriSearch(2, obj_sw.ub_matrix) obj.store(g, order_val) oracle_plugin = obj p = prims_plugin(order_val, oracle, oracle_plugin) p.mst(0) end = time.time() print("PARA", count, end - start, "\n\n")
class SW_and_LBUB_Oracle: def __init__(self): self.obj_sw = SashaWang() self.obj_lbub = unified_graph_lb_ub() self.is_uncalculated = self.obj_sw.is_uncalculated def store(self, graph, order_val): self.G = copy.copy(graph) self.obj_sw.store(graph, order_val) self.obj_lbub.store(self.G, order_val) def update(self, edge, val): self.obj_sw.update(edge, val) self.obj_lbub.update(edge, val) def lookup(self, x, y): sw_lb_ub = self.obj_sw.lookup(x, y) lbub_lb_ub = self.obj_lbub.lookup(x, y) if (lbub_lb_ub[0] - sw_lb_ub[0]) > 0.000000001: print("Found a better lower bound for {}, {}".format(x, y)) if (-lbub_lb_ub[1] + sw_lb_ub[1]) > 0.000000001: print("Found a better upper bound for {}, {}".format(x, y)) return lbub_lb_ub
def helper_lm_sampling(measure, kind, order_val, n, landmarks, verbose=True, sampling=True, sw=False): print("Values for the run - order_val:{}, Samples:{}, landmarks:{}".format( order_val, n, landmarks)) distance_measure = [ 'normal', 'uniform', 'zipf', 'data_flicker', 'data_sf', 'data_20' ] generation_algorithms = [ 'Geometric', 'Renyi Erdos', 'ForrestFire', 'Barabasi' ] """g_name usually looks like this -> "normal_distances_Barabasi_64.pkl" <distribution>_distances_<type-of-graph>_<#-of-nodes>.pkl """ g_name = distance_measure[measure] + '_distances_' + generation_algorithms[ kind] + '_' + str(order_val) + '.pkl' print('Path to the Input Graph File: {}'.format( os.path.join("/Users/jeesaugustine/git_it/distance_opti_plug_in/", "igraph", g_name))) graph = pickle.load(open(os.path.join("..", "igraph", g_name), 'rb')) # graph = {(0, 1): 0.3, (1, 2): 0.5, (2, 3): 0.5, (2, 4): 0.4, (4, 5): 0.6, (5, 6): 0.3} elm = EdgeLandMark(graph, n, order_val, Sampling=sampling) start = time.time() elm.find_paths() new_total_1 = elm.greedy_sampling(landmarks) print_string = "Sampling: True\nNo of Nodes in Graph: {} \nTotal Known Edges in Graph: {} \nTotal Right Samples(" \ "n): {} \nTotal Left Edges(k): {}\n Sum Lower Bounds: {}\n Time Taken:{}\n".format( order_val, len(graph), n, landmarks, new_total_1, (time.time()-start)/60) file_writer(verbose, g_name, n=n, landmarks=landmarks, pretext="ELM_", print_statement=print_string) # print(elm.greedyK) # elm = EdgeLandMark(graph, n, order_val, Sampling=False) # elm.find_paths() # elm.greedy_sampling(k) # if verbose: # print("Sampling: False\nNo of Nodes in Graph: {} \nTotal Known Edges in Graph: " # "{} \n ".format(order_val, len(graph))) # print(elm.greedyK) if sw: obj_sw = SashaWang() start_sw = time.time() obj_sw.store(graph, order_val) print_statement_sw = "Time taken for SW on the graph is {}\n".format( (time.time() - start_sw) / 60) file_writer(verbose, g_name, n=n, landmarks=landmarks, pretext="SW_", print_statement=print_statement_sw) new_total_1 = 0 lb_total = 0 for i in range(order_val): for j in range(i + 1, order_val): if obj_sw.matrix[i][j] != -1: new_total_1 += obj_sw.matrix[i][j] else: lb_total += obj_sw.lb_matrix[i][j] print("\nThe SW LB Sum(Given): {}, The LB Total: {}".format( new_total_1, lb_total))
def helper_navarro_plugin(g, full_mat1, g_mat, measure, kind, order_val, k, algo, three=False): global full_mat, count g, full_mat, g_mat = g, full_mat1, g_mat timer = Timer() timer.start() pr = vanila_knnrp(order_val, k, oracle) pr.knn_queries() # print("Plug-in(Nav): ", pr.NHA) timer.end() base_algo_results = "COUNT Without Plugin " + str(count) + " Time " + str( timer.time_elapsed) + "\n" print("COUNT Without Plugin ", count, timer.time_elapsed, "\n\n") timer.start() obj_sw = SashaWang() obj_sw.store(g, order_val) oracle_plugin = obj_sw p = knnrp(order_val, k, oracle, oracle_plugin) p.knn_queries() # print("Plug-in(Nav): ", p.NHA) timer.end() sasha_wang_results = "COUNT Sasha Wang " + str(count) + " Time " + str( timer.time_elapsed - obj_sw.update_time) + "\n" print("COUNT Sasha Wang ", count, timer.time_elapsed - obj_sw.update_time, "\n\n") """ DSS Solution Scheme """ g = {} timer.start() obj_dss = DSS(g, order_val) oracle_plugin = obj_dss p = knnrp(order_val, k, oracle, oracle_plugin) p.knn_queries() timer.end() print("DSS Experiments - {}\nmeasure: {}, kind: {}, order_val: {}".format( algo, measure, kind, order_val)) print("{} DSS COUNT {}\nTime: {}\n\n".format(algo, count, timer.time_elapsed)) """ Jees added this part to test the naive code for intersection-TriSearch(This) This accepts a graph(graph with edge and distance format) and converts it into adjacency list representation and stores it This updates a new edge the moment its gets a new resolution This computes the value of lower-bound only when needed by a query Q(a, b) This find the Triangles through intersection of adjacency list of both end points of the query edge (a & b) The class IntersectTriSearch: is initialized with the graph and it takes care of everything else including conversion to adjacency list. """ timer.start() oracle_plugin = IntersectTriSearch({}, order_val) p = knnrp(order_val, k, oracle, oracle_plugin) p.knn_queries() # print("Plug-in(Nav): ", p.NHA) timer.end() print( "IntersctionTriSearch Experiments - {}\nmeasure: {}, kind: {}, order_val: {}" .format(algo, measure, kind, order_val)) print( "{} COUNT intersct Trisearch: {}\nLB Time(Tri): {}\nUB Time(Tri): {}\n\n" .format(algo, count, timer.time_elapsed - oracle_plugin.sp_time, oracle_plugin.sp_time)) print("*" * 40) """ code to test the landmark based methods """ timer.start() oracle_plugin = LSS({}, order_val, 6, oracle) p = knnrp(order_val, k, oracle, oracle_plugin) p.knn_queries() timer.end() print("COUNT LSS: {}\nTime(LSS): {}\n\n".format(count, timer.time_elapsed)) """
def helper_kruskals_plugin(g, full_mat1, g_mat, measure, kind, order_val, algo, three=False): global full_mat, count g, full_mat, g_mat = g, full_mat1, g_mat timer = Timer() pr = vanila_kruskals(order_val, time_waste_oracle) timer.start() pr.mst() timer.end() base_algo_results = "COUNT Without Plugin " + str(count) + " Time " + str( timer.time_elapsed) + "\n" print("COUNT Without Plugin ", count, timer.time_elapsed, "\n\n") timer.start() obj_sw = SashaWang() obj_sw.store(g, order_val) oracle_plugin = obj_sw p = kruskals_plugin(order_val, oracle, oracle_plugin) p.mst() timer.end() print( "(KRUSKAL)Sasha Wang - Original Length: {}, our lenght: {}, measure: {}, kind: {}, order_val: {}" .format(pr.mst_path_length, p.mst_path_length, measure, kind, order_val)) assert abs(p.mst_path_length - pr.mst_path_length) < 0.000001 sasha_wang_results = "COUNT Sasha Wang " + str(count) + " Time " + str( timer.time_elapsed - obj_sw.update_time) + "\n" print("COUNT Sasha Wang ", count, timer.time_elapsed - obj_sw.update_time, "\n\n") """ DSS Solution Scheme """ g = {} timer.start() obj_dss = DSS(g, order_val) oracle_plugin = obj_dss p = kruskals_plugin(order_val, oracle, oracle_plugin) p.mst() timer.end() assert abs(p.mst_path_length - pr.mst_path_length) < 0.000001 print( "Plugin with DSS\nActual(SW) {} Path Length: {}\nDSS Path Length: {}\n" "measure: {}, kind: {}, order_val: {}".format(algo, p.mst_path_length, pr.mst_path_length, measure, kind, order_val)) print("DSS COUNT {}\nTime: {}\n\n".format( count, timer.time_elapsed, )) """ Jees added this part to test the naive code for intersection-TriSearch(This) This accepts a graph(graph with edge and distance format) and converts it into adjacency list representation and stores it This updates a new edge the moment its gets a new resolution This computes the value of lower-bound only when needed by a query Q(a, b) This find the Triangles through intersection of adjacency list of both end points of the query edge (a & b) The class IntersectTriSearch: is initialized with the graph and it takes care of everything else including conversion to adjacency list. """ timer.start() oracle_plugin = IntersectTriSearch({}, order_val) p = kruskals_plugin(order_val, oracle, oracle_plugin) p.mst() timer.end() print( "KRUSKAL - IntersctionTriSearch Experiments:\nActual(Vanila) KRUSKAL Path Length: {}\nIntersctionTriSearch(our plugin) Prims Path Length: {}\nmeasure: {}, kind: {}, order_val: {}" .format(pr.mst_path_length, p.mst_path_length, measure, kind, order_val)) print( "COUNT intersct Trisearch: {}\nLB Time(Tri): {}\nUB Time(Tri): {}\n\n". format(count, timer.time_elapsed - oracle_plugin.sp_time, oracle_plugin.sp_time)) print("My Lookup count(Tri): {}".format(oracle_plugin.lookup_count)) """ code to test the landmarkbased methods """ timer.start() oracle_plugin = LSS({}, order_val, 6, oracle) p = kruskals_plugin(order_val, oracle, oracle_plugin) p.mst() timer.end() print("{} COUNT LSS: {}\nTime(LSS): {}\n\n".format(algo, count, timer.time_elapsed)) """
def helper_prims_plugin(num_nodes): print(num_nodes) graph_maker = NlogNGraphMaker(num_nodes) graph = graph_maker.get_nlogn_edges() obj_sw = SashaWang() obj_sw.store(graph, num_nodes)