def communicative_basic(train_file, test_file, submission_file, num_predictions):
    '''
    main function
    '''
    
    print ">>> reading the graph from file ...",
    graph = {}
    graph = utilities.read_graph(train_file)
    print " done!"
    print ">> the graph contains %d ndoes" % len(graph)
    
    print ">>> building the edge set ...",
    edgeSet = set()
    nodeCredit = {}
    for node in graph.keys():
        nodeCredit[node] = 0
        for frdNode in graph[node]:
            edgeSet.add((node,frdNode))
    print "done!"
    
    def compareCredit(key):
        '''
        utility function to comapre the two credits given the key
        '''
        return nodeCredit[key]
    
    
    missingEdgeSet = set()
    print ">>> reversing the edge set, computing the credicts of each node and finding missing edges ...",
    for edge in edgeSet:
        if (edge[1], edge[0]) not in edgeSet:
            missingEdgeSet.add((edge[1], edge[0]))
        nodeCredit[edge[1]]+=1
    print " done!"
    
    testResult = {}
    testNodeList = utilities.read_nodes_list(test_file)
    testNodeSet = set(testNodeList)
    print ">> %d test Nodes read." % len(testNodeList)
    print ">>> making the missing edge dictionary for test nodes ...",
    for testNode in testNodeList: # pre-build the dictionary
        testResult[testNode] = []
    
    for edge in missingEdgeSet:
        if (edge[0] in testNodeSet):
            testResult[edge[0]].append(edge[1])
    print " done!"
    
    print ">>> sorting the final results according to node credits ...",
    for testNode in testNodeList:
        testResult[testNode].sort(key=compareCredit, reverse=True)
    print " done!"
    
    print ">>> outputing the final result ...",
    utilities.write_submission_file(submission_file, testNodeList, [testResult[testNode] for testNode in testNodeList])
    print " done!"
def bfs_benchmark(train_file, test_file, submission_file, num_predictions):
    """
    Runs the breadth-first search benchmark.
    """
    graph = utilities.read_graph(train_file)
    test_nodes = utilities.read_nodes_list(test_file)
    test_predictions = [breadth_first_search(graph, node, num_predictions)
                        for node in test_nodes]
    utilities.write_submission_file(submission_file, 
                                    test_nodes, 
                                    test_predictions)
Beispiel #3
0
def bfs_benchmark(train_file, test_file, submission_file, num_predictions):
    """
    Runs the breadth-first search benchmark.
    """
    graph = utilities.read_graph(train_file)
    test_nodes = utilities.read_nodes_list(test_file)
    test_predictions = [breadth_first_search(graph, node, num_predictions)
                        for node in test_nodes]
    utilities.write_submission_file(submission_file, 
                                    test_nodes, 
                                    test_predictions)
def jaccard_benchmark(train_file, test_file, submission_file, num_predictions):
    """
    Runs the breadth-first search benchmark.
    """

    start_time = time.time()
    (graph, reversegraph) = utilities.read_graph(train_file)
    print "Graph forming time = ", time.time() - start_time, "seconds"
    start_time = time.time()
    test_nodes = utilities.read_nodes_list(test_file)
    test_predictions = [jaccard_search(graph, reversegraph, node, num_predictions) for node in test_nodes]

    print "Prediction time = ", time.time() - start_time, "seconds"

    utilities.write_submission_file(submission_file, test_nodes, test_predictions)
def main():
    nx_graph = utilities.read_graph(FILE_PATH)
    print "NetworkX Directed Graph (V,E): (", nx_graph.number_of_nodes(), ",", nx_graph.number_of_edges(), ")"

    # Create the Personalized pagerank class object
    ppr = PersonalizedPageRank(nx_graph)

    # Calculate the to compare list for each node by selecting the nodes with the best personalized pagerank score for each node
    to_compare_list = {}
    for user_id in nx_graph.nodes():
        if user_id % 100 == 1:
            print "Running for", user_id

        to_compare_list[user_id] = ppr.get_page_rank(user_id)

    save_to_compare_list_to_file(to_compare_list, nx_graph, "to_compare_list.txt")
Beispiel #6
0
def main(graph_path, output_path):
    # Read in event graph
    graph = read_graph(graph_path)

    # Generate a new event graph with consecutive barrier nodes merged
    new_graph = merge_barriers(graph)

    # If no output path is given, construct default output path
    if output_path is None:
        output_dir = os.path.dirname(graph_path)
        name, ext = os.path.splitext(os.path.basename(graph_path))
        output_graph_name = name + "_merged_barriers"
        output_path = output_dir + "/" + output_graph_name + ext

    # Write new graph out to file
    new_graph.write(output_path, format="graphml")
def main_entrance(train_data_file, test_data_file, submit_data_file):
    '''
    the main entrance of the program
    '''
    ###############Configs#################
    minMutualFrd = 2
    ############End of Configs#############
    
    print ">>> reading the graph from file ...",
    following_graph = utilities.read_graph(train_data_file)
    print " done!"
    print ">> the graph contains %d ndoes" % len(following_graph)
    
    print ">>> reading test nodes ...",
    testNodeList = utilities.read_nodes_list(test_data_file)
    print " done!"
    
    edgeSet = get_edge_set(following_graph)
    nodeCredit = get_node_credit(edgeSet, following_graph.keys())
    commu_missingEdgeDict = get_commu_missing_edge(edgeSet, testNodeList)
    mutual_missingEdgeDict = get_mutual_missing_edge(following_graph, testNodeList, edgeSet, following_graph.keys(), minMutualFrd)
    
    # union two edge dicts
    finalPrediction = {}
    for node in testNodeList:
        finalPrediction[node] = list(set(mutual_missingEdgeDict[node]) | set(commu_missingEdgeDict[node]))
    
    # customized comparator for final prediction
    def compareCredit(key):
        '''
        utility function to comapre the two credits given the key
        '''
        return nodeCredit[key]
    
    # rank the predictions
    print ">>> sorting the final results according to node credits ...",
    for testNode in testNodeList:
        finalPrediction[testNode].sort(key=compareCredit, reverse=True)
    print " done!"
    
    # write prediction to file
    print ">>> outputing the final result ...",
    utilities.write_submission_file(submit_data_file, testNodeList, [finalPrediction[testNode] for testNode in testNodeList])
    print " done!"
Beispiel #8
0
def main( slice_dir, transform, output_dir ):
    # Set up transformed slice dir
    if output_dir is None:
        output_dir = str(pathlib.Path(slice_dir).parent) + "/transformed_slices_" + transform + "/"
    if not os.path.isdir( output_dir ):
        pathlib.Path( output_dir ).mkdir( parents=True, exist_ok=True )
    # Compute slice-to-rank assignment
    assignment = assign_slices( slice_dir )
    # Each rank ingests its slices
    idx_to_slice = { idx:read_graph(path) for idx,path in assignment.items() }
    # Each rank transforms its slices
    if transform == "comm_channel":
        idx_to_transformed = { idx:transform_to_communication_channel_graph(s) for idx,s in idx_to_slice.items() }
    else:
        raise NotImplementedError("Event Graph Transform: {} is not implemented".format(transform))
    # And writes them out
    for idx,ts in idx_to_transformed.items():
        output_path = output_dir + "/transformed_slice_" + str(idx) + ".graphml"
        ts.write( output_path, format="graphml" )
def jaccard_benchmark(train_file, test_file, submission_file, num_predictions):
    """
    Runs the breadth-first search benchmark.
    """

    start_time = time.time()
    (graph, reversegraph) = utilities.read_graph(train_file)
    print "Graph forming time = ", time.time() - start_time, "seconds"
    start_time = time.time()
    test_nodes = utilities.read_nodes_list(test_file)
    test_predictions = [
        jaccard_search(graph, reversegraph, node, num_predictions)
        for node in test_nodes
    ]

    print "Prediction time = ", time.time() - start_time, "seconds"

    utilities.write_submission_file(submission_file, test_nodes,
                                    test_predictions)
def communicative_basic(train_file, test_file, submission_file, num_predictions):
    '''
    main function
    '''
    
    print ">>> reading the graph from file ...",
    graph = {}
    graph = utilities.read_graph(train_file)
    print " done!"
    print ">> the graph contains %d ndoes" % len(graph)
    
    print ">>> building the edge set ...",
    edgeSet = set()
    for node in graph.keys():
        for frdNode in graph[node]:
            edgeSet.add((node,frdNode))
    print "done!"
    
    missingEdgeSet = set()
    print ">>> reversing the edge set, finding missing edges ...",
    for edge in edgeSet:
        if (edge[1], edge[0]) not in edgeSet:
            missingEdgeSet.add((edge[1], edge[0]))
    print " done!"
    
    testResult = {}
    testNodeList = utilities.read_nodes_list(test_file)
    testNodeSet = set(testNodeList)
    print ">> %d test Nodes read." % len(testNodeList)
    print ">>> making the missing edge dictionary for test nodes ...",
    for testNode in testNodeList: # pre-build the dictionary
        testResult[testNode] = []
    
    for edge in missingEdgeSet:
        if (edge[0] in testNodeSet):
            testResult[edge[0]].append(edge[1])
    print " done!"
    
    print ">>> outputing the final result ...",
    utilities.write_submission_file(submission_file, testNodeList, [testResult[testNode] for testNode in testNodeList])
    print " done!"
def main():
    nx_graph = utilities.read_graph(FILE_PATH)
    print "NetworkX Directed Graph (V,E): (", nx_graph.number_of_nodes(), ",", nx_graph.number_of_edges(), ")"

    data = get_data(FILE_NAME, ",")

    features_list = []
    for node_pair in data:
        neighbour_feature_list = Features.get_all_features(nx_graph, int(node_pair[0]), int(node_pair[1]))

	features = []
    	features.append(int(node_pair[0]))
    	features.append(int(node_pair[1]))
    	features.append(node_pair[2])
    	features.extend(neighbour_feature_list)
    	features.append(node_pair[3])
	features_list.append(features)

    a = np.asarray(features_list)
    print a.shape
    np.savetxt("features_data.csv", a, delimiter=",")
def get_slice_data(slice_dirs, slice_idx, kernel_params, callstacks_available):
    print("Ingesting subgraphs for slice: {}".format(slice_idx))
    slice_subgraph_paths = [
        str(sd) + "/slice_" + str(slice_idx) + ".graphml" for sd in slice_dirs
    ]
    #slice_subgraphs = read_graphs_parallel( slice_subgraph_paths )
    slice_subgraphs = [read_graph(g) for g in slice_subgraph_paths]

    # Compute extra labels (e.g., logical time increment)
    slice_subgraphs = [compute_extra_labels(g) for g in slice_subgraphs]

    # Compute the requested kernel distance matrices
    print("Computing kernel distances for slice: {}".format(slice_idx))
    kernel_distance_data = compute_kernel_distance_matrices(
        slice_subgraphs, kernel_params)

    # Extract wall-time information for correlating with application events
    print("Extracting wall-time data for slice: {}".format(slice_idx))
    wall_time_data = extract_wall_time_data(slice_subgraphs)

    # Extract callstack data if available
    if callstacks_available:
        print("Extracting callstack data for slice: {}".format(slice_idx))
        callstack_data = extract_callstack_data(slice_subgraphs)
    else:
        callstack_data = {}

    slice_data = {
        "kernel_distance": kernel_distance_data,
        "wall_time": wall_time_data,
        "callstack": callstack_data
    }

    #for k,d in kernel_distance_data.items():
    #    if np.count_nonzero( d ) > 0:
    #        pprint.pprint( slice_data )
    #        exit()

    return slice_data
Beispiel #13
0
def main(graph_path, barrier_adjustment, take_slice, lower_bound, upper_bound,
         ranks, partials):
    graph = read_graph(graph_path)
    if take_slice:
        graph = extract_slice(graph, lower_bound, upper_bound, ranks, partials)
    visualize(graph, barrier_adjustment)
Beispiel #14
0
import OLSF 
import curate
import collections
import random


# first line of embedding needs to be removed, dimensions
# dataset needs to be labeled
filepath = '../data/graph_dblp'
emb_path1 = '../embeddings/node2vec.emb'
emb_path2 = '../embeddings/deepwalk.emb'
supergraph_path = '../data/graph_supergraph'


# read and sample the supergraph
g = utilities.read_graph(supergraph_path)
super_subgraph = curate.sample_connected_component(g, 'Xindong Wu', 500)

#alg1 = n2v.node2vec(g,False)
#alg1.learn_embeddings()
#data = utilities.read_embedding(emb_path1)
#alg2 = dw.deepwalk(g,False)
#alg2.learn_embeddings()


# start with drawing a set of publication supernode from super_subgraph:
num_samples = 50
ai_samples = []
db_samples = []

for node in super_subgraph:
Beispiel #15
0
    count = 0
    for node in test_nodes:
        suggested = suggest_friends(follow, followed, clf, node,
            popular_people, max_suggestion)
        predictions.append(suggested)

        count += 1
        if count % 100 == 0:
            print 'Suggested %d friends.' % count

    print 'Writing submission files...'
    utilities.write_submission_file(submission_file, test_nodes, predictions)

if __name__ == '__main__':
    start_time = time.time()
    follow, followed = utilities.read_graph('./data/train.csv')

    validation.generate_test_set(follow, followed,
         './data/test.csv',
         './data/validation.csv',
         './data/solution.csv',
         2000, 10)

    main(follow, followed,
         './data/validation.csv',
         './data/result.csv',
         './data/data.csv',
         './data/data_test.csv',
         10)

    # main(follow, followed,
def main():
    nx_graph = utilities.read_graph(FILE_PATH)
    print "NetworkX Directed Graph (V,E): (", nx_graph.number_of_nodes(), ",", nx_graph.number_of_edges(), ")"

    get_shortest_path(nx_graph)
    for node in test_nodes:
        suggested = suggest_friends(follow, followed, clf, node,
                                    popular_people, max_suggestion)
        predictions.append(suggested)

        count += 1
        if count % 100 == 0:
            print 'Suggested %d friends.' % count

    print 'Writing submission files...'
    utilities.write_submission_file(submission_file, test_nodes, predictions)


if __name__ == '__main__':
    start_time = time.time()
    follow, followed = utilities.read_graph('./data/train.csv')

    validation.generate_test_set(follow, followed, './data/test.csv',
                                 './data/validation.csv',
                                 './data/solution.csv', 2000, 10)

    main(follow, followed, './data/validation.csv', './data/result.csv',
         './data/data.csv', './data/data_test.csv', 10)

    # main(follow, followed,
    #      './data/test.csv',
    #      './data/result.csv',
    #      './data/data.csv',
    #      10)

    print(time.time() - start_time) / 60.0, 'minutes'