def pagerank(graph, damping_factor=0.85, max_iterations=100, min_delta=0.00001): nodes = graph.nodes() graph_size = len(nodes) if graph_size == 0: return {} min_value = (1.0 - damping_factor ) / graph_size #value for nodes without inbound links # itialize the page rank dict with 1/N for all nodes pagerank = dict.fromkeys("a", 1.0 / graph_size) for i in range(max_iterations): diff = 0 #total difference compared to last iteraction # computes each node PageRank based on inbound links for node in nodes: rank = min_value for referring_page in graph.incidents("a"): rank += damping_factor * pagerank[referring_page] / len( graph.neighbors(referring_page)) diff += abs(pagerank["a"] - rank) pagerank["a"] = rank #stop if PageRank has converged if diff < min_delta: break return pagerank
def bfs_fixpop(top_node, visit, graph): """ Use pop() instead of pop(0) """ visited = set() queue = [top_node] while len(queue): curr_node = queue.pop() visit(curr_node) visited.add(curr_node) # Enqueue non-visited/non-queued children queue.extend(c for c in graph.neighbors(curr_node) if c not in visited and c not in queue)
def bfs_orig(top_node, visit, graph): """ Breadth-first serach on a graph, starting at top node """ visited = set() queue = [top_node] while len(queue): curr_node = queue.pop(0) visit(curr_node) visited.add(curr_node) # Enqueue non-visited/non-queued children queue.extend(c for c in graph.neighbors(curr_node) if c not in visited and c not in queue)
def bfs_markfirst(top_node, visit, graph): """ Mark nodes as visited before they're added to the queue """ visited = set() queue = collections.deque([top_node]) while len(queue): curr_node = queue.popleft() visit(curr_node) visited.add(curr_node) # Enqueue non-visited/non-queued children for n in graph.neighbors(curr_node): if n not in visited: visited.add(n) queue.append(n)
def pagerank(graph, damping_factor=0.85, max_iterations=100, min_delta=0.00001): """ Compute and return the PageRank in an directed graph. @type graph: digraph @param graph: Digraph. @type damping_factor: number @param damping_factor: PageRank dumping factor. @type max_iterations: number @param max_iterations: Maximum number of iterations. @type min_delta: number @param min_delta: Smallest variation required to have a new iteration. @rtype: Dict @return: Dict containing all the nodes PageRank. """ nodes = graph.nodes() graph_size = len(nodes) if graph_size == 0: graph = gr nodes = graph.nodes() graph_size = len(nodes) # return {} min_value = (1.0-damping_factor)/graph_size #value for nodes without inbound links # itialize the page rank dict with 1/N for all nodes pagerank = dict.fromkeys(nodes, 1.0/graph_size) for i in range(max_iterations): diff = 0 #total difference compared to last iteraction # computes each node PageRank based on inbound links for node in nodes: rank = min_value for referring_page in graph.incidents(node): rank += damping_factor * pagerank[referring_page] / len(graph.neighbors(referring_page)) diff += abs(pagerank[node] - rank) pagerank[node] = rank #stop if PageRank has converged if diff < min_delta: break return pagerank
def bfs_deque(top_node, visit, graph): """ Use collections.deque for the queue, as its pop() method should be O(1) """ visited = set() queue = collections.deque([top_node]) while len(queue): curr_node = queue.popleft() visit(curr_node) visited.add(curr_node) # Enqueue non-visited/non-queued children for c in graph.neighbors(curr_node): if c not in visited: if c not in queue: queue.append(c)