def run_pagerank_job(path_to_file):
    """Runs PageRank on the specified graph using graphlab's API.

    Parameters
    ----------
    path_to_file : String type
        The path leading to the edge list file

    Returns
    -------
    runtime : String type
        The total runtime of the job
    """

    toc = time.time()

    g = gl.load_graph(path, 'snap')

    pr = gl.pagerank.create(g, max_iterations=20)

    tic = time.time()

    print pr.summary()

    pr_out = pr['pagerank']

    return "Total runtime: {} seconds".format(tic - toc)
def run_ndegree_neigh_job(path_to_file, source_vertex, degree):
    """Finds the nth degree neighborhood on the specified graph using graphlab's API. This case ignores direction.

    Parameters
    ----------
    path_to_file : String type
        The path leading to the edge list file

    source_vertex : Long type
        The id of the source vertex
    
    degree : int type
        The degree of neighbors

    Returns
    -------
    runtime : String type
        The total runtime of the job
    """

    toc = time.time()

    graph = gl.load_graph(path, 'snap')

    sssp = gl.shortest_path.create(graph, source_vid=source_vertex)

    sp_sframe = sssp['distance'].filter_by(float(degree), 'distance')

    print("Neighorhood length: {}\nNeighbors:\n{}".format(
        sp_sframe.num_rows(), sp_sframe))

    tic = time.time()

    return "Total runtime: {} seconds".format(tic - toc)
Exemple #3
0
def _get_gl_object_from_persistent_id(type_tag, gl_archive_abs_path):
    """
    Internal util to get a GLC object from a persistent ID in the pickle file.

    Parameters
    ----------
    type_tag : The name of the glc class as saved in the GLC pickler.

    gl_archive_abs_path: An absolute path to the GLC archive where the 
                          object was saved.

    Returns
    ----------
    The GLC object.

    """
    if type_tag == "SFrame":
        obj = _gl.SFrame(gl_archive_abs_path)
    elif type_tag == "SGraph":
        obj = _gl.load_graph(gl_archive_abs_path)
    elif type_tag == "SArray":
        obj = _gl.SArray(gl_archive_abs_path)
    elif type_tag == "Model":
        obj = _gl.load_model(gl_archive_abs_path)
    else:
        raise _pickle.UnpicklingError("GraphLab pickling Error: Unspported object."
              " Only SFrames, SGraphs, SArrays, and Models are supported.")
    return obj
Exemple #4
0
def run_conn_comp_job(path_to_file):
    """Finds the connected components on the specified graph using graphlab's API.

    Parameters
    ----------
    path_to_file : String type
        The path leading to the edge list file

    Returns
    -------
    runtime : String type
        The total runtime of the job
    """

    toc = time.time()

    g = gl.load_graph(path, 'snap')

    conn_comp = gl.connected_components.create(g)

    tic = time.time()

    print conn_comp.summary()

    return "Total runtime: {} seconds".format(tic - toc)
def run_triangle_counting_job(path_to_file):
    """Calculates the number of triangles on the specified graph using graphlab's API.

    Parameters
    ----------
    path_to_file : String type
        The path leading to the edge list file

    Returns
    -------
    runtime : String type
        The total runtime of the job
    """

    toc = time.time()

    g = gl.load_graph(path, 'snap')

    tri = gl.triangle_counting.create(g)

    tic = time.time()

    print tri.summary()

    tri_out = tri['triangle_count']

    return "Total runtime: {} seconds".format(tic - toc)
Exemple #6
0
def _get_gl_object_from_persistent_id(type_tag, gl_archive_abs_path):
    """
    Internal util to get a GLC object from a persistent ID in the pickle file.

    Parameters
    ----------
    type_tag : The name of the glc class as saved in the GLC pickler.

    gl_archive_abs_path: An absolute path to the GLC archive where the 
                          object was saved.

    Returns
    ----------
    The GLC object.

    """
    if type_tag == "SFrame":
        obj = _gl.SFrame(gl_archive_abs_path)
    elif type_tag == "SGraph":
        obj = _gl.load_graph(gl_archive_abs_path)
    elif type_tag == "SArray":
        obj = _gl.SArray(gl_archive_abs_path)
    elif type_tag == "Model":
        obj = _gl.load_model(gl_archive_abs_path)
    else:
        raise _pickle.UnpicklingError(
            "GraphLab pickling Error: Unspported object."
            " Only SFrames, SGraphs, SArrays, and Models are supported.")
    return obj
Exemple #7
0
def run_ndegree_neigh_job (path_to_file, source_vertex, degree):
    """Finds the nth degree neighborhood on the specified graph using graphlab's API.
    Parameters
    ----------
    path_to_file : String type
        The path leading to the edge list file
    source_vertex : Long type
        The id of the source vertex
    
    degree : int type
        The degree of neighbors
    Returns
    -------
    runtime : String type
        The total runtime of the job
    """
    
    toc = time.time()

    g = gl.load_graph(path, 'snap')

    result = nth_neighborhood(g, source_vertex, degree)
    print("Neighorhood length: {}\nNeighbors:\n{}".format(len(neighbors), neighbors))

    tic = time.time()

    return "Total runtime: {} seconds".format(tic-toc)
def run_sssp_job(path_to_file, source_vertex, target_vertex):
    """Finds the shortest path from a given vertex to a target vertex on the specified graph using graphlab's API.

    Parameters
    ----------
    path_to_file : String type
        The path leading to the edge list file

    source_vertex : Long type
        The id of the source vertex

    target_vertex : Long type
        The id of the target vertex

    Returns
    -------
    runtime : String type
        The total runtime of the job
    """

    toc = time.time()

    g = gl.load_graph(path, 'snap')

    sssp = gl.shortest_path.create(g, source_vid=source_vertex)

    result = sssp.get_path(vid=target_vertex)

    print result

    tic = time.time()

    return "Total runtime: {} seconds".format(tic - toc)
Exemple #9
0
 def test_exception(self):
     self.assertRaises(IOError, lambda: glconnect.get_unity().__read__("/root/tmp"))
     self.assertRaises(IOError, lambda: glconnect.get_unity().__write__("/root/tmp", '.....'))
     self.assertRaises(IOError, lambda: glconnect.get_unity().__read__("/root/tmp"))
     self.assertRaises(IOError, lambda: glconnect.get_unity().__write__("/root/tmp", '.....'))
     self.assertRaises(IOError, lambda: self.graph.save("/root/tmp.graph"))
     self.assertRaises(IOError, lambda: self.sframe.save("/root/tmp.frame_idx"))
     self.assertRaises(IOError, lambda: self.model.save("/root/tmp.model"))
     self.assertRaises(IOError, lambda: graphlab.load_graph("/root/tmp.graph"))
     self.assertRaises(IOError, lambda: graphlab.load_sframe("/root/tmp.frame_idx"))
     self.assertRaises(IOError, lambda: graphlab.load_model("/root/tmp.model"))
def run_reachability_job (path_to_file, source_vertex, target_vertex, max_depth):
    """Determines whether a target vertex is reachable from a source vertex on the specified graph using graphlab's API.

    Parameters
    ----------
    path_to_file : String type
        The path leading to the edge list file

    source_vertex : Long type
        The id of the source vertex
    
    target_vertex : Long type
        The id of the target vertex
    
    max_depth : int type
        The maximum recursion depth

    Returns
    -------
    runtime : String type
        The total runtime of the job
    """
    
    toc = time.time()

    graph = gl.load_graph(path, 'snap')

    sources_set = set([source_vertex]) # Start from source vertex - BFS
    targets_set = set()

    is_reachable = False

    while max_depth > 0:
        for vertex in sources_set:
            outgoing_edges = graph.get_edges(src_ids=[vertex])
            targets_set.update(list(outgoing_edges["__dst_id"]))
        
        if target_vertex in targets_set:
            is_reachable = True
            break
        else:
            sources_set = targets_set
            targets_set = set()
        max_depth -= 1

    tic = time.time()

    if is_reachable:
        print("Vertex {} is reachable from vertex {}".format(target_vertex, source_vertex))
    else:
        print("Vertex {} cannot be reached from vertex {}".format(target_vertex, source_vertex))

    return "Total runtime: {} seconds".format(tic-toc)
Exemple #11
0
 def test_exception(self):
     self.assertRaises(ValueError, lambda: self._test_read_write_helper(self.tempfile, 'hello world'))
     self.assertRaises(ValueError, lambda: self._test_read_write_helper("local://" + self.tempfile + ".csv.gz", 'hello,world,woof'))
     self.assertRaises(IOError, lambda: glconnect.get_unity().__read__("remote:///root/tmp"))
     self.assertRaises(IOError, lambda: glconnect.get_unity().__read__("remote:///root/tmp"))
     self.assertRaises(IOError, lambda: glconnect.get_unity().__write__("remote:///root/tmp", '.....'))
     self.assertRaises(IOError, lambda: self.graph.save("remote:///root/tmp.graph"))
     self.assertRaises(IOError, lambda: self.sframe.save("remote:///root/tmp.frame_idx"))
     self.assertRaises(IOError, lambda: self.model.save("remote:///root/tmp.model"))
     self.assertRaises(IOError, lambda: graphlab.load_graph("remote:///root/tmp.graph"))
     self.assertRaises(IOError, lambda: graphlab.load_sframe("remote:///root/tmp.frame_idx"))
     self.assertRaises(IOError, lambda: graphlab.load_model("remote:///root/tmp.model"))
Exemple #12
0
 def test_exception(self):
     bad_url = "hdfs:///root/"
     if self.has_hdfs:
         self.assertRaises(IOError, lambda: glconnect.get_unity().__read__("hdfs:///"))
         self.assertRaises(IOError, lambda: glconnect.get_unity().__read__("hdfs:///tmp"))
         self.assertRaises(IOError, lambda: glconnect.get_unity().__read__("hdfs://" + self.tempfile))
         self.assertRaises(IOError, lambda: glconnect.get_unity().__write__(bad_url + "/tmp", "somerandomcontent"))
         self.assertRaises(IOError, lambda: self.graph.save(bad_url + "x.graph"))
         self.assertRaises(IOError, lambda: self.sframe.save(bad_url + "x.frame_idx"))
         self.assertRaises(IOError, lambda: self.model.save(bad_url + "x.model"))
         self.assertRaises(IOError, lambda: graphlab.load_graph(bad_url + "mygraph"))
         self.assertRaises(IOError, lambda: graphlab.load_sframe(bad_url + "x.frame_idx"))
         self.assertRaises(IOError, lambda: graphlab.load_model(bad_url + "x.model"))
     else:
         logging.getLogger(__name__).info("No hdfs avaiable. Test pass.")
def run_reachability_job (path_to_file, source_vertex, target_vertex):
    """Determines whether a target vertex is reachable from a source vertex on the specified graph using graphlab's API.

    Parameters
    ----------
    path_to_file : String type
        The path leading to the edge list file

    source_vertex : Long type
        The id of the source vertex
    
    target_vertex : Long type
        The id of the target vertex
    
    max_depth : int type
        The maximum recursion depth

    Returns
    -------
    runtime : String type
        The total runtime of the job
    """
    
    toc = time.time()

    graph = gl.load_graph(path, 'snap')

    is_reachable = False

    sssp = gl.shortest_path.create(graph, source_vid=source_vertex)
    
    sp_sframe = sssp['distance'].filter_by(target_vertex, '__id')

    distance = list(sp_sframe['distance'])[0]

    if distance < 1e+30:
        is_reachable = True

    tic = time.time()

    if is_reachable:
        print("Vertex {} is reachable from vertex {} - Distance: {}".format(target_vertex, source_vertex, int(distance)))
    else:
        print("Vertex {} cannot be reached from vertex {} - Distance: {}".format(target_vertex, source_vertex, int(distance)))

    return "Total runtime: {} seconds".format(tic-toc)
Exemple #14
0
 def test_exception(self):
     if self.has_s3:
         bad_bucket = "i_am_a_bad_bucket"
         prefix = "s3://" + bad_bucket
         self.assertRaises(IOError, lambda: glconnect.get_unity().__read__("s3:///"))
         self.assertRaises(IOError, lambda: glconnect.get_unity().__read__("s3://" + self.standard_bucket + "/somerandomfile"))
         self.assertRaises(IOError, lambda: glconnect.get_unity().__read__("s3://" + "/somerandomfile"))
         self.assertRaises(IOError, lambda: glconnect.get_unity().__write__("s3://" + "/somerandomfile", "somerandomcontent"))
         self.assertRaises(IOError, lambda: glconnect.get_unity().__write__("s3://" + self.standard_bucket + "I'amABadUrl/", "somerandomcontent"))
         self.assertRaises(IOError, lambda: self.graph.save(prefix + "/x.graph"))
         self.assertRaises(IOError, lambda: self.sframe.save(prefix + "/x.frame_idx"))
         self.assertRaises(IOError, lambda: self.model.save(prefix + "/x.model"))
         self.assertRaises(IOError, lambda: graphlab.load_graph(prefix + "/x.graph"))
         self.assertRaises(IOError, lambda: graphlab.load_sframe(prefix + "/x.frame_idx"))
         self.assertRaises(IOError, lambda: graphlab.load_model(prefix + "/x.model"))
     else:
         logging.getLogger(__name__).info("No s3 bucket avaiable. Test pass.")
def run_ndegree_neigh_job(path_to_file, source_vertex, degree):
    """Finds the nth degree neighborhood on the specified graph using graphlab's API.

    Parameters
    ----------
    path_to_file : String type
        The path leading to the edge list file

    source_vertex : Long type
        The id of the source vertex
    
    degree : int type
        The degree of neighbors

    Returns
    -------
    runtime : String type
        The total runtime of the job
    """

    toc = time.time()

    graph = gl.load_graph(path, 'snap')

    sources_set = set([source_vertex])  # Start from source vertex - BFS
    targets_set = set()

    while degree > 0:
        for vertex in sources_set:
            outgoing_edges = graph.get_edges(src_ids=[vertex])
            targets_set.update(list(outgoing_edges["__dst_id"]))

        if degree is 1:
            nth_neighbors = targets_set
            break
        else:
            sources_set = targets_set
            targets_set = set()
        degree -= 1

    print("Neighorhood length: {}\nNeighbors:\n{}".format(
        len(nth_neighbors), nth_neighbors))

    tic = time.time()

    return "Total runtime: {} seconds".format(tic - toc)
Exemple #16
0
def _test_save_load_object_helper(testcase, obj, url):
    """
    Helper function to test save and load a server side object to a given url.
    """
    def cleanup(url):
        """
        Remove the saved file from temp directory.
        """
        protocol = None
        path = None
        splits = url.split("://")
        if len(splits) > 1:
            protocol = splits[0]
            path = splits[1]
        else:
            path = url
        if not protocol or protocol is "local" or protocol is "remote":
            tempdir = tempfile.gettempdir()
            pattern = path + ".*"
            for f in os.listdir(tempdir):
                if re.search(pattern, f):
                    os.remove(os.path.join(tempdir, f))

    if isinstance(obj, graphlab.SGraph):
        obj.save(url + ".graph")
        newobj = graphlab.load_graph(url + ".graph")
        testcase.assertItemsEqual(obj.get_fields(), newobj.get_fields())
        testcase.assertDictEqual(obj.summary(), newobj.summary())
    elif isinstance(obj, graphlab.Model):
        obj.save(url + ".model")
        newobj = graphlab.load_model(url + ".model")
        testcase.assertItemsEqual(obj.list_fields(), newobj.list_fields())
        testcase.assertEqual(type(obj), type(newobj))
    elif isinstance(obj, graphlab.SFrame):
        obj.save(url + ".frame_idx")
        newobj = graphlab.load_sframe(url + ".frame_idx")
        testcase.assertEqual(obj.shape, newobj.shape)
        testcase.assertEqual(obj.column_names(), newobj.column_names())
        testcase.assertEqual(obj.column_types(), newobj.column_types())
        assert_frame_equal(obj.head(obj.num_rows()).to_dataframe(),
                           newobj.head(newobj.num_rows()).to_dataframe())
    else:
        raise TypeError
    cleanup(url)
Exemple #17
0
def run_reachability_job(path_to_file, source_vertex, target_vertex,
                         max_depth):
    """Determines whether a target vertex is reachable from a source vertex on the specified graph using graphlab's API.
    Parameters
    ----------
    path_to_file : String type
        The path leading to the edge list file
    source_vertex : Long type
        The id of the source vertex
    
    target_vertex : Long type
        The id of the target vertex
    
    max_depth : int type
        The maximum recursion depth
    Returns
    -------
    runtime : String type
        The total runtime of the job
    """

    toc = time.time()

    g = gl.load_graph(path, 'snap')

    result = is_reachable(g, source_vertex, target_vertex, max_depth)

    tic = time.time()

    if result:
        print("Vertex {} is reachable from vertex {}".format(
            source_vertex, target_vertex))
    else:
        print("Vertex {} cannot be reached from vertex {}".format(
            source_vertex, target_vertex))

    return "Total runtime: {} seconds".format(tic - toc)
Exemple #18
0
def run_ndegree_neigh_job(path_to_file, source_vertex, degree):
    """Finds the nth degree neighborhood on the specified graph using graphlab's API. This case ignores direction.

    Parameters
    ----------
    path_to_file : String type
        The path leading to the edge list file

    source_vertex : Long type
        The id of the source vertex
    
    degree : int type
        The degree of neighbors

    Returns
    -------
    runtime : String type
        The total runtime of the job
    """

    toc = time.time()

    graph = gl.load_graph(path, 'snap')

    subgraph = graph.get_neighborhood(ids=[source_vertex],
                                      radius=degree,
                                      full_subgraph=False)

    nth_neighbors = set(subgraph.get_vertices()["__id"])

    print("Neighorhood length: {}\nNeighbors:\n{}".format(
        len(nth_neighbors), nth_neighbors))

    tic = time.time()

    return "Total runtime: {} seconds".format(tic - toc)
Exemple #19
0
#!/usr/bin/env python
import graphlab
import sys
import time

if (len(sys.argv) < 5):
	print "Usage: %s <edge list filename> <epsilon> <damping factor> <max iterations>" % sys.argv[0]
	sys.exit(1)

filename = sys.argv[1]
print(filename)

# from https://dato.com/products/create/docs/generated/graphlab.pagerank.create.html
epsilon = float(sys.argv[2])
damping = float(sys.argv[3])
maxIterations = int(sys.argv[4])
g = graphlab.load_graph(filename, format='snap')
start = time.time()
pr = graphlab.pagerank.create(g, reset_probability=damping, max_iterations=maxIterations, _distributed=False, threshold=epsilon)
end = time.time()
pr_out = pr['pagerank']     # SFrame
g.vertices['pagerank'] = pr['graph'].vertices['pagerank']

print(pr_out)
print('time: ' + str(end - start))
    del g.vertices['total_weight']

    # initialize vertex field
    g.vertices['prev_pagerank'] = 1.0
    it = 0
    total_l1_delta = len(g.vertices)
    start = time.time()
    while(total_l1_delta > threshold and it < max_iterations):
        g.vertices['pagerank'] = 0.0
        g = g.triple_apply(pagerank_update_fn, ['pagerank'])
        g.vertices['pagerank'] = g.vertices['pagerank'] * (1 - reset_prob) \
                                                                  + reset_prob
        g.vertices['l1_delta'] = (g.vertices['pagerank'] - \
                          g.vertices['prev_pagerank']).apply(lambda x: abs(x))
        total_l1_delta = g.vertices['l1_delta'].sum()
        g.vertices['prev_pagerank'] = g.vertices['pagerank']
        print 'Iteration %d: total pagerank changed in L1 = %f' % (it,\
                                                                total_l1_delta)
        it = it + 1
    print 'Triple apply pagerank finished in: %f secs' % (time.time() - start)
    del g.vertices['prev_pagerank']
    return g

# Load graph
g = gl.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', 'snap')
g.edges['weight'] = 1.0

# Run triple apply sssp
pagerank_graph = pagerank_triple_apply(g)
print pagerank_graph
#dijkstra for 100,000 nodes in  graph lab
import graphlab

print "-----Nodes: 100,000-----"
g = graphlab.load_graph('http://snap.stanford.edu/data/bigdata/communities/com-youtube.ungraph.txt.gz', format='snap')
sp = graphlab.shortest_path.create(g, source_vid=1)
sp_sframe = sp['distance']
sp_sframe
print "---------------------"
sp_sframe
sp_sframe.print_rows(100,3)
print "----above are the first 100 computed vertices-----"
# pagerank for 1 million nodes in  graph lab
import graphlab

print "-----1000 node Dataset-----"
g = graphlab.load_graph("../dataset/pr_1000.txt", format="snap")
pr = graphlab.pagerank.create(g)
pr_out = pr["pagerank"]
print "---------------------"
pr_out
pr_out.print_rows(100, 3)
print "----above are the first 100 computed vertices-----"
Exemple #23
0
 def setUp(self):
     url = dataset_server + "p2p-Gnutella04.txt.gz"
     self.graph = gl.load_graph(url, format='snap')
Exemple #24
0
#coding:utf-8
__author__ = 'zlj'
import sys

reload(sys)
sys.setdefaultencoding('utf8')

import graphlab as gl

gl.load_graph()
Exemple #25
0
# Example usage:
# rmse_train[0.1] = [r1, r2, r3, ...]
# where 0.1 is the regularization parameter (lambda),
# r1 is the RMSE on the training data after 1 pass over the data,
# r2 is the RMSE on the training data after 2 passes over the data,
# etc
rmse_train = {}
# Same thing, but for validation data
rmse_val = {}
# Same thing, but for test data
rmse_test = {}
lambs = [0, 0.001, 0.01, 0.1, 1]

# You should not have to edit any of the code below, except to plot figures.
for l in lambs:
    g = graphlab.load_graph('data/training_graph.sgraph')
    n, m = M.shape
    L = np.ones((n + 1, k))
    R = np.ones((k, m + 1))
    lambda_u = lambda_v = l
    rmse_train[l] = []
    rmse_val[l] = []
    rmse_test[l] = []
    # Get the initial rmse, before we do anything
    rmse = np.sqrt(
        sum((M[M.nonzero()] - L.dot(R)[M.nonzero()])**2) / len(M[M.nonzero()]))
    rmse_train[l].append(rmse)
    rmse = np.sqrt(
        sum((val[val.nonzero()] - L.dot(R)[val.nonzero()])**2) /
        len(val[val.nonzero()]))
    rmse_val[l].append(rmse)
#pagerank for 1 million nodes in  graph lab
import graphlab
print "-----Youtube Dataset-----"
print "-----Nodes: 1134890 Edges: 2987624-----"
g = graphlab.load_graph('http://snap.stanford.edu/data/bigdata/communities/com-youtube.ungraph.txt.gz', format='snap')
pr = graphlab.pagerank.create(g)
pr_out = pr['pagerank']
print "---------------------"
pr_out
pr_out.print_rows(100,3)
print "----above are the first 100 computed vertices-----"
        print 'Iteration %d: total pagerank changed in L1 = %f' % (it, \
                                                                   total_l1_delta)
        it = it + 1
    print 'Triple apply pagerank finished in: %f secs' % (time.time() - start)
    del g.vertices['prev_pagerank']
    return g


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument("--threshold", type=float, nargs='?',
                        const=True, default=1e-3,
                        help="threshold")
    parser.add_argument("--max_iteration", type=float, nargs='?',
                        const=True, default=20,
                        help="max iterations")
    args = parser.parse_args()
    threshold = args.threshold
    max_iteration = args.max_iteration

    print "Start pagerank with threshold=%s, max_iteration=%s" % (str(threshold), str(max_iteration))
    g = gl.load_graph('https://snap.stanford.edu/data/web-Google.txt.gz', 'snap')
    g.edges['weight'] = 1.0

    pagerank_graph = pagerank_triple_apply(g, threshold=threshold, max_iterations=max_iteration)

    output_file = './result_%s_%s.txt' % (str(threshold), str(max_iteration))
    with open(output_file, 'w') as f:
        sorted = pagerank_graph.vertices.sort('pagerank', ascending=False)
        sorted.print_rows(100, output_file=f)
#dijkstra for 1000 nodes in  graph lab
import graphlab

print "-----Nodes: 1000-----"
g = graphlab.load_graph('../dataset/pr_1000.txt', format='snap')
sp = graphlab.shortest_path.create(g, source_vid=1)
sp_sframe = sp['distance']
sp_sframe
print "---------------------"
sp_sframe
sp_sframe.print_rows(100,3)
print "----above are the first 100 computed vertices-----"
Exemple #29
0
 def setUp(self):
     url = dataset_server + "p2p-Gnutella04.txt.gz"
     self.graph = gl.load_graph(url, format='snap')
#dijkstra for 36000 nodes in  graph lab
import graphlab

print "-----Nodes: 36692 Edges: 367662-----"
g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz',
                        format='snap')
sp = graphlab.shortest_path.create(g, source_vid=1)
sp_sframe = sp['distance']
sp_sframe
print "---------------------"
sp_sframe
sp_sframe.print_rows(100, 3)
print "----above are the first 100 computed vertices-----"
Exemple #31
0
#!/usr/bin/env python
import graphlab
import sys
import time

if (len(sys.argv) < 5):
    print "Usage: %s <edge list filename> <epsilon> <damping factor> <max iterations>" % sys.argv[
        0]
    sys.exit(1)

filename = sys.argv[1]
print(filename)

# from https://dato.com/products/create/docs/generated/graphlab.pagerank.create.html
epsilon = float(sys.argv[2])
damping = float(sys.argv[3])
maxIterations = int(sys.argv[4])
g = graphlab.load_graph(filename, format='snap')
start = time.time()
pr = graphlab.pagerank.create(g,
                              reset_probability=damping,
                              max_iterations=maxIterations,
                              _distributed=False,
                              threshold=epsilon)
end = time.time()
pr_out = pr['pagerank']  # SFrame
g.vertices['pagerank'] = pr['graph'].vertices['pagerank']

print(pr_out)
print('time: ' + str(end - start))
#dijkstra for 1000 nodes in  graph lab
import graphlab

print "-----Nodes: 1000-----"
g = graphlab.load_graph('../dataset/pr_1000.txt', format='snap')
sp = graphlab.shortest_path.create(g, source_vid=1)
sp_sframe = sp['distance']
sp_sframe
print "---------------------"
sp_sframe
sp_sframe.print_rows(100, 3)
print "----above are the first 100 computed vertices-----"
#dijkstra for 36000 nodes in  graph lab
import graphlab

print "-----Nodes: 36692 Edges: 367662-----"
g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
sp = graphlab.shortest_path.create(g, source_vid=1)
sp_sframe = sp['distance']
sp_sframe
print "---------------------"
sp_sframe
sp_sframe.print_rows(100,3)
print "----above are the first 100 computed vertices-----"