def communicative_basic(train_file, test_file, submission_file, num_predictions): ''' main function ''' print ">>> reading the graph from file ...", graph = {} graph = utilities.read_graph(train_file) print " done!" print ">> the graph contains %d ndoes" % len(graph) print ">>> building the edge set ...", edgeSet = set() nodeCredit = {} for node in graph.keys(): nodeCredit[node] = 0 for frdNode in graph[node]: edgeSet.add((node,frdNode)) print "done!" def compareCredit(key): ''' utility function to comapre the two credits given the key ''' return nodeCredit[key] missingEdgeSet = set() print ">>> reversing the edge set, computing the credicts of each node and finding missing edges ...", for edge in edgeSet: if (edge[1], edge[0]) not in edgeSet: missingEdgeSet.add((edge[1], edge[0])) nodeCredit[edge[1]]+=1 print " done!" testResult = {} testNodeList = utilities.read_nodes_list(test_file) testNodeSet = set(testNodeList) print ">> %d test Nodes read." % len(testNodeList) print ">>> making the missing edge dictionary for test nodes ...", for testNode in testNodeList: # pre-build the dictionary testResult[testNode] = [] for edge in missingEdgeSet: if (edge[0] in testNodeSet): testResult[edge[0]].append(edge[1]) print " done!" print ">>> sorting the final results according to node credits ...", for testNode in testNodeList: testResult[testNode].sort(key=compareCredit, reverse=True) print " done!" print ">>> outputing the final result ...", utilities.write_submission_file(submission_file, testNodeList, [testResult[testNode] for testNode in testNodeList]) print " done!"
def bfs_benchmark(train_file, test_file, submission_file, num_predictions): """ Runs the breadth-first search benchmark. """ graph = utilities.read_graph(train_file) test_nodes = utilities.read_nodes_list(test_file) test_predictions = [breadth_first_search(graph, node, num_predictions) for node in test_nodes] utilities.write_submission_file(submission_file, test_nodes, test_predictions)
def jaccard_benchmark(train_file, test_file, submission_file, num_predictions): """ Runs the breadth-first search benchmark. """ start_time = time.time() (graph, reversegraph) = utilities.read_graph(train_file) print "Graph forming time = ", time.time() - start_time, "seconds" start_time = time.time() test_nodes = utilities.read_nodes_list(test_file) test_predictions = [jaccard_search(graph, reversegraph, node, num_predictions) for node in test_nodes] print "Prediction time = ", time.time() - start_time, "seconds" utilities.write_submission_file(submission_file, test_nodes, test_predictions)
def main(): nx_graph = utilities.read_graph(FILE_PATH) print "NetworkX Directed Graph (V,E): (", nx_graph.number_of_nodes(), ",", nx_graph.number_of_edges(), ")" # Create the Personalized pagerank class object ppr = PersonalizedPageRank(nx_graph) # Calculate the to compare list for each node by selecting the nodes with the best personalized pagerank score for each node to_compare_list = {} for user_id in nx_graph.nodes(): if user_id % 100 == 1: print "Running for", user_id to_compare_list[user_id] = ppr.get_page_rank(user_id) save_to_compare_list_to_file(to_compare_list, nx_graph, "to_compare_list.txt")
def main(graph_path, output_path): # Read in event graph graph = read_graph(graph_path) # Generate a new event graph with consecutive barrier nodes merged new_graph = merge_barriers(graph) # If no output path is given, construct default output path if output_path is None: output_dir = os.path.dirname(graph_path) name, ext = os.path.splitext(os.path.basename(graph_path)) output_graph_name = name + "_merged_barriers" output_path = output_dir + "/" + output_graph_name + ext # Write new graph out to file new_graph.write(output_path, format="graphml")
def main_entrance(train_data_file, test_data_file, submit_data_file): ''' the main entrance of the program ''' ###############Configs################# minMutualFrd = 2 ############End of Configs############# print ">>> reading the graph from file ...", following_graph = utilities.read_graph(train_data_file) print " done!" print ">> the graph contains %d ndoes" % len(following_graph) print ">>> reading test nodes ...", testNodeList = utilities.read_nodes_list(test_data_file) print " done!" edgeSet = get_edge_set(following_graph) nodeCredit = get_node_credit(edgeSet, following_graph.keys()) commu_missingEdgeDict = get_commu_missing_edge(edgeSet, testNodeList) mutual_missingEdgeDict = get_mutual_missing_edge(following_graph, testNodeList, edgeSet, following_graph.keys(), minMutualFrd) # union two edge dicts finalPrediction = {} for node in testNodeList: finalPrediction[node] = list(set(mutual_missingEdgeDict[node]) | set(commu_missingEdgeDict[node])) # customized comparator for final prediction def compareCredit(key): ''' utility function to comapre the two credits given the key ''' return nodeCredit[key] # rank the predictions print ">>> sorting the final results according to node credits ...", for testNode in testNodeList: finalPrediction[testNode].sort(key=compareCredit, reverse=True) print " done!" # write prediction to file print ">>> outputing the final result ...", utilities.write_submission_file(submit_data_file, testNodeList, [finalPrediction[testNode] for testNode in testNodeList]) print " done!"
def main( slice_dir, transform, output_dir ): # Set up transformed slice dir if output_dir is None: output_dir = str(pathlib.Path(slice_dir).parent) + "/transformed_slices_" + transform + "/" if not os.path.isdir( output_dir ): pathlib.Path( output_dir ).mkdir( parents=True, exist_ok=True ) # Compute slice-to-rank assignment assignment = assign_slices( slice_dir ) # Each rank ingests its slices idx_to_slice = { idx:read_graph(path) for idx,path in assignment.items() } # Each rank transforms its slices if transform == "comm_channel": idx_to_transformed = { idx:transform_to_communication_channel_graph(s) for idx,s in idx_to_slice.items() } else: raise NotImplementedError("Event Graph Transform: {} is not implemented".format(transform)) # And writes them out for idx,ts in idx_to_transformed.items(): output_path = output_dir + "/transformed_slice_" + str(idx) + ".graphml" ts.write( output_path, format="graphml" )
def jaccard_benchmark(train_file, test_file, submission_file, num_predictions): """ Runs the breadth-first search benchmark. """ start_time = time.time() (graph, reversegraph) = utilities.read_graph(train_file) print "Graph forming time = ", time.time() - start_time, "seconds" start_time = time.time() test_nodes = utilities.read_nodes_list(test_file) test_predictions = [ jaccard_search(graph, reversegraph, node, num_predictions) for node in test_nodes ] print "Prediction time = ", time.time() - start_time, "seconds" utilities.write_submission_file(submission_file, test_nodes, test_predictions)
def communicative_basic(train_file, test_file, submission_file, num_predictions): ''' main function ''' print ">>> reading the graph from file ...", graph = {} graph = utilities.read_graph(train_file) print " done!" print ">> the graph contains %d ndoes" % len(graph) print ">>> building the edge set ...", edgeSet = set() for node in graph.keys(): for frdNode in graph[node]: edgeSet.add((node,frdNode)) print "done!" missingEdgeSet = set() print ">>> reversing the edge set, finding missing edges ...", for edge in edgeSet: if (edge[1], edge[0]) not in edgeSet: missingEdgeSet.add((edge[1], edge[0])) print " done!" testResult = {} testNodeList = utilities.read_nodes_list(test_file) testNodeSet = set(testNodeList) print ">> %d test Nodes read." % len(testNodeList) print ">>> making the missing edge dictionary for test nodes ...", for testNode in testNodeList: # pre-build the dictionary testResult[testNode] = [] for edge in missingEdgeSet: if (edge[0] in testNodeSet): testResult[edge[0]].append(edge[1]) print " done!" print ">>> outputing the final result ...", utilities.write_submission_file(submission_file, testNodeList, [testResult[testNode] for testNode in testNodeList]) print " done!"
def main(): nx_graph = utilities.read_graph(FILE_PATH) print "NetworkX Directed Graph (V,E): (", nx_graph.number_of_nodes(), ",", nx_graph.number_of_edges(), ")" data = get_data(FILE_NAME, ",") features_list = [] for node_pair in data: neighbour_feature_list = Features.get_all_features(nx_graph, int(node_pair[0]), int(node_pair[1])) features = [] features.append(int(node_pair[0])) features.append(int(node_pair[1])) features.append(node_pair[2]) features.extend(neighbour_feature_list) features.append(node_pair[3]) features_list.append(features) a = np.asarray(features_list) print a.shape np.savetxt("features_data.csv", a, delimiter=",")
def get_slice_data(slice_dirs, slice_idx, kernel_params, callstacks_available): print("Ingesting subgraphs for slice: {}".format(slice_idx)) slice_subgraph_paths = [ str(sd) + "/slice_" + str(slice_idx) + ".graphml" for sd in slice_dirs ] #slice_subgraphs = read_graphs_parallel( slice_subgraph_paths ) slice_subgraphs = [read_graph(g) for g in slice_subgraph_paths] # Compute extra labels (e.g., logical time increment) slice_subgraphs = [compute_extra_labels(g) for g in slice_subgraphs] # Compute the requested kernel distance matrices print("Computing kernel distances for slice: {}".format(slice_idx)) kernel_distance_data = compute_kernel_distance_matrices( slice_subgraphs, kernel_params) # Extract wall-time information for correlating with application events print("Extracting wall-time data for slice: {}".format(slice_idx)) wall_time_data = extract_wall_time_data(slice_subgraphs) # Extract callstack data if available if callstacks_available: print("Extracting callstack data for slice: {}".format(slice_idx)) callstack_data = extract_callstack_data(slice_subgraphs) else: callstack_data = {} slice_data = { "kernel_distance": kernel_distance_data, "wall_time": wall_time_data, "callstack": callstack_data } #for k,d in kernel_distance_data.items(): # if np.count_nonzero( d ) > 0: # pprint.pprint( slice_data ) # exit() return slice_data
def main(graph_path, barrier_adjustment, take_slice, lower_bound, upper_bound, ranks, partials): graph = read_graph(graph_path) if take_slice: graph = extract_slice(graph, lower_bound, upper_bound, ranks, partials) visualize(graph, barrier_adjustment)
import OLSF import curate import collections import random # first line of embedding needs to be removed, dimensions # dataset needs to be labeled filepath = '../data/graph_dblp' emb_path1 = '../embeddings/node2vec.emb' emb_path2 = '../embeddings/deepwalk.emb' supergraph_path = '../data/graph_supergraph' # read and sample the supergraph g = utilities.read_graph(supergraph_path) super_subgraph = curate.sample_connected_component(g, 'Xindong Wu', 500) #alg1 = n2v.node2vec(g,False) #alg1.learn_embeddings() #data = utilities.read_embedding(emb_path1) #alg2 = dw.deepwalk(g,False) #alg2.learn_embeddings() # start with drawing a set of publication supernode from super_subgraph: num_samples = 50 ai_samples = [] db_samples = [] for node in super_subgraph:
count = 0 for node in test_nodes: suggested = suggest_friends(follow, followed, clf, node, popular_people, max_suggestion) predictions.append(suggested) count += 1 if count % 100 == 0: print 'Suggested %d friends.' % count print 'Writing submission files...' utilities.write_submission_file(submission_file, test_nodes, predictions) if __name__ == '__main__': start_time = time.time() follow, followed = utilities.read_graph('./data/train.csv') validation.generate_test_set(follow, followed, './data/test.csv', './data/validation.csv', './data/solution.csv', 2000, 10) main(follow, followed, './data/validation.csv', './data/result.csv', './data/data.csv', './data/data_test.csv', 10) # main(follow, followed,
def main(): nx_graph = utilities.read_graph(FILE_PATH) print "NetworkX Directed Graph (V,E): (", nx_graph.number_of_nodes(), ",", nx_graph.number_of_edges(), ")" get_shortest_path(nx_graph)
for node in test_nodes: suggested = suggest_friends(follow, followed, clf, node, popular_people, max_suggestion) predictions.append(suggested) count += 1 if count % 100 == 0: print 'Suggested %d friends.' % count print 'Writing submission files...' utilities.write_submission_file(submission_file, test_nodes, predictions) if __name__ == '__main__': start_time = time.time() follow, followed = utilities.read_graph('./data/train.csv') validation.generate_test_set(follow, followed, './data/test.csv', './data/validation.csv', './data/solution.csv', 2000, 10) main(follow, followed, './data/validation.csv', './data/result.csv', './data/data.csv', './data/data_test.csv', 10) # main(follow, followed, # './data/test.csv', # './data/result.csv', # './data/data.csv', # 10) print(time.time() - start_time) / 60.0, 'minutes'