def test_in_place(self): """Tests for an in-place reweighting of the edges of the graph. """ G = nx.DiGraph() G.add_edge(0, 1, weight=1) G.add_edge(0, 2, weight=1) nx.stochastic_graph(G, copy=False) assert_equal(sorted(G.edges(data=True)), [(0, 1, {'weight': 0.5}), (0, 2, {'weight': 0.5})])
def test_in_place(self): """Tests for an in-place reweighting of the edges of the graph.""" G = nx.DiGraph() G.add_edge(0, 1, weight=1) G.add_edge(0, 2, weight=1) nx.stochastic_graph(G, copy=False) assert sorted(G.edges(data=True)) == [ (0, 1, {"weight": 0.5}), (0, 2, {"weight": 0.5}), ]
def add_noise(G,noise=1e-13): # Add noise to the greater weights in the graph # NOTE: this method is used to handle the eigs() RuntimeError: Factor is exactly singular max_weight = max(e[2]['weight'] for e in G.edges_iter(data=True)) for e in G.edges_iter(data=True): if e[2]['weight'] == max_weight: e[2]['weight'] += noise if not gm.check_if_stochastic_matrix(nx.to_numpy_matrix(G)): nx.stochastic_graph(G, copy=False) return G
def add_noise(G, noise=1e-13): # Add noise to the greater weights in the graph # NOTE: this method is used to handle the eigs() RuntimeError: Factor is exactly singular max_weight = max(e[2]['weight'] for e in G.edges_iter(data=True)) for e in G.edges_iter(data=True): if e[2]['weight'] == max_weight: e[2]['weight'] += noise if not gm.check_if_stochastic_matrix(nx.to_numpy_matrix(G)): nx.stochastic_graph(G, copy=False) return G
def test_stochastic(): G=nx.DiGraph() G.add_edge(0,1) G.add_edge(0,2) S=nx.stochastic_graph(G) assert_true(nx.is_isomorphic(G,S)) assert_equal(sorted(S.edges(data=True)), [(0, 1, {'weight': 0.5}), (0, 2, {'weight': 0.5})]) S=nx.stochastic_graph(G,copy=True) assert_equal(sorted(S.edges(data=True)), [(0, 1, {'weight': 0.5}), (0, 2, {'weight': 0.5})])
def update_bipartite_graph(self, graph, import_export_data): new_graph = nx.DiGraph() # use nodes in original graph new_graph.add_nodes_from(graph.nodes(data=True)) # add new edges # add country - product edges for country, records in import_export_data.iteritems(): if not country in self.country_list: continue for each_record in records: if not each_record['id'] in self.product_list: continue if not each_record['import'] == 0: new_graph.add_edge( self.country_ids[country], self.product_ids[each_record['id']], weight=each_record['import']) # import edge if not each_record['export'] == 0: new_graph.add_edge( self.product_ids[each_record['id']], self.country_ids[country], weight=each_record['export']) # export edge # create a copy in (right) stochastic form W = nx.stochastic_graph(new_graph, weight='weight') return W
def test_multidigraph(self): G = nx.MultiDiGraph() G.add_edges_from([(0, 1), (0, 1), (0, 2), (0, 2)]) S = nx.stochastic_graph(G) d = dict(weight=0.25) assert (sorted(S.edges(data=True)) == [(0, 1, d), (0, 1, d), (0, 2, d), (0, 2, d)])
def pagerank(G, damping=0.85, max_iterations=1000, tolerance=10e-6) -> dict: if len(G) == 0: return {} # A right-stochastic graph is a weighted digraph in which for each node, # the sum of the weights of all the out-edges of that node is 1. (From NetworkX documentation) stoch = nx.stochastic_graph(G) n = stoch.number_of_nodes() ranks = dict.fromkeys(stoch, 1.0 / n) dead_ends = [node for node in stoch if stoch.out_degree(node) == 0.0] for _ in range(max_iterations): previous_x = ranks ranks = dict.fromkeys(previous_x.keys(), 0) dead_end_sum = damping*sum(previous_x[node] for node in dead_ends) for node in ranks: for outgoing_node in stoch[node]: ranks[outgoing_node] += damping * previous_x[node] * stoch[node][outgoing_node]['weight'] ranks[node] += dead_end_sum * (1.0 / n) + (1.0 - damping) * (1.0 / n) # Check if the difference is within tolerance if sum([abs(ranks[node] - previous_x[node]) for node in ranks]) < n * tolerance: return ranks raise BaseException
def pagerank_edgetypes_indirect(D, edgetype_scale, indirect_nodes, max_iter=100, tol=1.0e-6, weight='weight'): W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() direct_nodes = [a for a in W if a not in indirect_nodes] x = dict.fromkeys(direct_nodes, 1.0 / len(direct_nodes)) p = dict.fromkeys(direct_nodes, 1.0 / len(direct_nodes)) for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0.0) weight_to_distribute = sum([(xlast[n] * W[n][nbr]['weight'] * edgetype_scale[W[n][nbr]['type']]) for n in x for nbr in W[n]]) undistributed_weight = 1 - weight_to_distribute for n in x: for nbr in W[n]: if nbr in indirect_nodes: contribution = xlast[n] * W[n][nbr][weight] * edgetype_scale[W[n][nbr]['type']] / len(W[nbr]) for nbr_adj in W[nbr]: x[nbr_adj] += contribution else: x[nbr] += xlast[n] * W[n][nbr][weight] * edgetype_scale[W[n][nbr]['type']] x[n] += undistributed_weight * p.get(n, 0) err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x raise nx.PowerIterationFailedConvergence(max_iter)
def page_rank(G, weight='weight'): max_iter = 100 d = 0.85 tol = 1.0e-6 if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G W = nx.stochastic_graph(D, weight='weight') N = G.number_of_nodes() rank = dict.fromkeys(W, 1.0 / N) pvector = dict.fromkeys(W, 1.0 / N) dweights = pvector dnodes = [n for n in W if W.out_degree(n, weight='weight') == 0.0] for _ in range(max_iter): last = rank rank = dict.fromkeys(last.keys(), 0) dsum = d * sum(last[n] for n in dnodes) for n in rank: for nbr in W[n]: rank[nbr] += d * last[n] * W[n][nbr][weight] rank[n] += dsum * dweights[n] + (1.0 - d) * pvector[n] err = sum([abs(rank[n] - last[n]) for n in rank]) if err < N * tol: return rank
def create_transition_matrix(self): """ Creates ride_count_matrix and transition_matrix """ # Create a networkx graph count_df = (self.df[[ 'pickup_bin', 'dropoff_bin', 'weight' ]].groupby(by=['pickup_bin', 'dropoff_bin']).sum().reset_index()) G = nx.from_pandas_edgelist(df=count_df, source='pickup_bin', target='dropoff_bin', edge_attr=['weight'], create_using=nx.DiGraph(attr='weight')) # Create ride_count_matrix ride_count_matrix = nx.to_numpy_matrix(G, nodelist=self.hex_bins, weight='weight') self.ride_count_matrix = np.squeeze(np.asarray(ride_count_matrix)) # Create transition matrix G = nx.stochastic_graph(G, weight='weight') transition_matrix = nx.to_numpy_matrix(G, nodelist=self.hex_bins, weight='weight') transition_matrix = np.squeeze(np.asarray(transition_matrix)) # Remove 0 values transition_matrix[transition_matrix == 0] = 0.001 transition_matrix = (transition_matrix / transition_matrix.sum(axis=1)[:, None]) self.transition_matrix = transition_matrix
def update_bipartite_graph(self, graph, import_export_data): new_graph = nx.DiGraph() # use nodes in original graph new_graph.add_nodes_from(graph.nodes(data=True)) # add new edges # add country - product edges for country, records in import_export_data.iteritems(): if not country in self.country_list: continue for each_record in records: if not each_record['id'] in self.product_list: continue if not each_record['import'] == 0: new_graph.add_edge(self.country_ids[country], self.product_ids[each_record['id']], weight=each_record['import']) # import edge if not each_record['export'] == 0: new_graph.add_edge(self.product_ids[each_record['id']], self.country_ids[country], weight=each_record['export']) # export edge # create a copy in (right) stochastic form W = nx.stochastic_graph(new_graph, weight='weight') return W
def rpr_matrix(graph, alpha=0.85): D = graph.to_directed() H = nx.stochastic_graph(D) H = nx.to_numpy_matrix(H).transpose() I = np.eye(H.shape[0]) S = alpha * np.linalg.inv(I - (1 - alpha) * H) return S
def rank(G, beta=0.85, max_iter=100, tol=1.0e-6, weight='weight'): if len(G) == 0: return {} M = nx.stochastic_graph(G) N = M.number_of_nodes() C = nx.to_numpy_matrix(M) print C v = dict.fromkeys(M, 1.0 / N) t = dict.fromkeys(M, 1.0 / N) #print matrix print v # power iteration: make up to max_iter iteration iter = 0 for _ in range(max_iter): iter += 1 vlast = v v = dict.fromkeys(vlast.keys(), 0) for n in v: for nbr in M[n]: v[nbr] += beta * vlast[n] * M[n][nbr][weight] v[n] += (1.0 - beta) * t.get(n, 0) # check convergence, l1 norm err = sum([abs(v[n] - vlast[n]) for n in v]) if err < N * tol: break print v print iter
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight='weight', dangling=None): if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G W = net.stochastic_graph(D, weight=weight) N = W.number_of_nodes() if nstart is None: x = dict.fromkeys(W, 1.0 / N) else: s = float(sum(nstart.values())) x = dict((k, v / s) for k, v in nstart.items()) if personalization is None: p = dict.fromkeys(W, 1.0 / N) else: missing = set(G) - set(personalization) if missing: raise NetworkXError('Personalization dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(personalization.values())) p = dict((k, v / s) for k, v in personalization.items()) if dangling is None: dangling_weights = p #print(dangling_weights) else: missing = set(G) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(dangling.values())) dangling_weights = dict((k, v/s) for k, v in dangling.items()) dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) for n in x: for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N*tol: return x raise NetworkXError('pagerank: power iteration failed to converge ' 'in %d iterations.' % max_iter)
def test_pagerank_algo(self): W = nx.stochastic_graph(self.D, weight='weight') prs = pagerank_unrecorded(W, personalization=self._personalization, weight='weight') for key, val in prs.iteritems(): self.assertEqual(val, self.result_pr[key])
def __init__(self, G, alpha=0.85, weight='weight', debug=False): if not isinstance(G, nx.classes.graph.Graph): raise AttributeError( 'The Graph is not an instance of networkx Graph') if isinstance(G, nx.classes.multidigraph.MultiDiGraph): raise AttributeError('The MultiDiGraph instance is not supported') if not isinstance(G, nx.classes.digraph.DiGraph): print('gen directed graph') G = self.gen_directed_graph(G, weight=weight) self._DEBUG = debug self._nodes = len(self._G) self._alpha = alpha weight_tot = 0.0 # Test if each edge of the graph has an attribut 'weight' self._weight_attribut = weight for u, v, edata in G.edges(data=True): if self._weight_attribut not in edata: raise AttributeError( 'The Graph has a missing weight attribut on edges') self._G = nx.stochastic_graph(G, weight=weight) # Compute the pageRank for the Graph self.init_nodes_in_community() self.init_communities() self.build_personalization_vector(G) self.compute_pagerank(alpha=alpha)
def test_arbitrary_weights(self): G = nx.DiGraph() G.add_edge(0, 1, weight=1) G.add_edge(0, 2, weight=1) S = nx.stochastic_graph(G) assert_equal(sorted(S.edges(data=True)), [(0, 1, {'weight': 0.5}), (0, 2, {'weight': 0.5})])
def rooted_pagerank(graph, root_node, alpha=0.85, max_iter=100): if not graph.is_directed(): D = graph.to_directed() else: D = graph H = nx.stochastic_graph(D) n = len(graph.nodes()) H = nx.to_numpy_matrix(H).transpose() x = np.full((n, 1), 1.0 / n) v = np.full((n, 1), 0.0) nodes = D.nodes() index = 0 for node in nodes: # print(node) # print(root_node) # print(index) if node == root_node: # print(1-alpha) v[index][0] = 1 - alpha break index = index + 1 # print(v) for _ in range(max_iter): x = np.add(alpha * np.dot(H, x), v) # x = alpha*H*x + (1-alpha)*v x = np.squeeze(np.asarray(x)) x = x / np.sum(x) return dict(zip(graph.nodes(), x))
def get_mc_attributes(G): G = G.to_directed() G = nx.stochastic_graph(G) tm = nx.to_numpy_matrix(G) tm = np.squeeze(np.asarray(tm)) return (G, tm)
def clculate_pagerank(G, alpha=0.85, max_iter=100, tol=1.0e-6, weight='weight'): if not G.is_directed(): D = G.to_directed() else: D = G # Create a copy in (right) stochastic form W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() x = dict.fromkeys(W, 1.0 / N) p = dict.fromkeys(W, 1.0 / N) dangling_weights = p dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) for n in x: for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x
def page_rank(self, G): d = 0.85 iterator = 100 tol = 1.0e-6 weight = 'weight' if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G W = nx.stochastic_graph(D, weight='weight') N = W.number_of_nodes() x = dict.fromkeys(W, 1.0 / N) p = dict.fromkeys(W, 1.0 / N) for _ in range(iterator): xlast = x x = dict.fromkeys(xlast.keys(), 0) for n in x: for nbr in W[n]: x[nbr] += d * xlast[n] * W[n][nbr][weight] x[n] += (1.0 - d) * p[n] err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x
def getPageRank(self,D, d=0.85, max_iter=60, tol=1.0e-6, weight='weight'): print('BEGINN PAGE RANK CALCULATION') if len(D) == 0: return {} G = nx.stochastic_graph(D, weight=weight) N = G.number_of_nodes() print('Number of nodes: ' + str(N)) x = dict.fromkeys(G,1.0 / N) p = dict.fromkeys(G,1.0 / N) dangling_weights = x dangling_nodes = [n for n in G if G.out_degree(n, weight=weight) == 0.0] for _ in range(max_iter): print('Enter iteration ' + str(_)) xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = d * sum(xlast[n] for n in dangling_nodes) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in G[n]: x[nbr] += d * xlast[n] * G[n][nbr][weight] x[n] += danglesum * dangling_weights[n] + (1.0 - d) * p[n] # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N*tol: print("pagerank: power iteration failed to converge in" + str(iterations)) return x
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight='weight', dangling=None): if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G # Create a copy in (right) stochastic form W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() # Choose fixed starting vector if not given if nstart is None: x = dict.fromkeys(W, 1.0 / N) else: # Normalized nstart vector s = float(sum(nstart.values())) x = dict((k, v / s) for k, v in nstart.items()) if personalization is None: # Assign uniform personalization vector if not given p = dict.fromkeys(W, 1.0 / N) else: s = float(sum(personalization.values())) p = dict((k, v / s) for k, v in personalization.items()) if dangling is None: # Use personalization vector if dangling vector not specified dangling_weights = p else: s = float(sum(dangling.values())) dangling_weights = dict((k, v / s) for k, v in dangling.items()) dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum * dangling_weights.get( n, 0) + (1.0 - alpha) * p.get(n, 0) # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x raise nx.PowerIterationFailedConvergence(max_iter)
def test_multidigraph(self): G = nx.MultiDiGraph() G.add_edges_from([(0, 1), (0, 1), (0, 2), (0, 2)]) S = nx.stochastic_graph(G) d = dict(weight=0.25) assert_equal(sorted(S.edges(data=True)), [(0, 1, d), (0, 1, d), (0, 2, d), (0, 2, d)])
def test_stochastic_ints(): G=nx.DiGraph() G.add_edge(0,1,weight=1) G.add_edge(0,2,weight=1) S=nx.stochastic_graph(G) assert_equal(sorted(S.edges(data=True)), [(0, 1, {'weight': 0.5}), (0, 2, {'weight': 0.5})])
def get_mc_attributes(start_time="2012-04-01 10:00:00", duration=120): # Create csv read iterator data = read_trips_file("hubway_trips_2012.csv") start_time = pd.to_datetime(start_time) end_time = start_time + timedelta(minutes=duration) df = data[(data['start_date'] >= start_time) & (data['end_date'] <= end_time)] stations = read_stations_file("hubway_stations.csv") status = read_status_file("stationstatus_2012_4.csv") status_df = status[status['update'] == start_time] # Remove trips starting or ending in the stations not present in stations dataframe # or stations not present in the status file station_ids = set(stations['id']) status_df = status_df[status_df['station_id'].isin(station_ids)] df = df[(df['strt_statn'].isin(station_ids)) & (df['end_statn'].isin(station_ids))] trips_df = pd.DataFrame( {'weight': df.groupby(['strt_statn', 'end_statn']).size()}) trips_df = trips_df.reset_index() print "Creating networkx graph" G = nx.from_pandas_dataframe(trips_df, 'strt_statn', 'end_statn', 'weight', create_using=nx.DiGraph()) G = nx.stochastic_graph(G, weight='weight') # Add stations that are present in status_ids but not in trips_df status_ids = set(status['station_id']) for node in status_ids - set(G.nodes()): G.add_node(node) print "Creating transition matrix" transition_matrix = nx.to_numpy_matrix(G, weight='weight') transition_matrix = np.squeeze(np.asarray(transition_matrix)) print "Creating object assignment and distribution" object_assignment = {} object_distribution = {} for node in G.nodes(): try: object_assignment[node] = status_df[status_df['station_id'] == node].get('nbBikes').item() except: object_assignment[node] = 0 num_objects = sum(object_assignment.values()) for node in G.nodes(): object_distribution[node] = 1.0 * object_assignment[node] / num_objects return (num_objects, transition_matrix, G, object_distribution)
def test_arbitrary_weights(self): G = nx.DiGraph() G.add_edge(0, 1, weight=1) G.add_edge(0, 2, weight=1) S = nx.stochastic_graph(G) assert sorted(S.edges(data=True)) == [ (0, 1, {"weight": 0.5}), (0, 2, {"weight": 0.5}), ]
def test_default_weights(self): G = nx.DiGraph() G.add_edge(0, 1) G.add_edge(0, 2) S = nx.stochastic_graph(G) assert nx.is_isomorphic(G, S) assert sorted(S.edges(data=True)) == [ (0, 1, {"weight": 0.5}), (0, 2, {"weight": 0.5}), ]
def pagerank(Graph, alpha=0.85, personalization = None , max_iteration = 50 , tolerance = 1.0e-8, initial_set_rank = None , weight='weight', dangling=None): if len(Graph) == 0: return {} #check directed of graph if not Graph.is_directed(): Directed_Graph = Graph.to_directed() else: Directed_Graph = Graph #get graph to stochastic graph stochastic_graph = nx.stochastic_graph(Directed_Graph, weight=weight) Number_node = stochastic_graph.number_of_nodes() #set initial rank for each node if initial_set_rank is None: x = dict.fromkeys(stochastic_graph, 1.0 / Number_node) else: sum_initial_set_rank = float(sum(initial_set_rank.values())) x = dict((key, value / sum_initial_set_rank) for key, value in initial_set_rank.items()) #craete fake link between each node if personalization is None: personalization_vector = dict.fromkeys(stochastic_graph, 1.0 / Number_node) else: sum_personalization_vector = float(sum(personalization.values())) personalization_vector = dict((key, value / sum_personalization_vector) for key, value in personalization.items()) #set stochastic value for node without outlink if dangling is None: dangling_weights = personalization_vector else: sum_dangling_weights = float(sum(dangling.values())) dangling_weights = dict((key, value / sum_dangling_weights) for key, value in dangling.items()) dangling_nodes = [n for n in stochastic_graph if stochastic_graph.out_degree(n, weight=weight) == 0.0] #calculate the rank for matrix A = αP +(1−α)1/n*eeT for i in range(max_iteration): print("iteration {} : the rank of page is :\n".format(i)) last_rank = x x = dict.fromkeys(last_rank.keys(), 0) danglesum = alpha * sum(last_rank[n] for n in dangling_nodes) for n in x: for nbr in stochastic_graph[n]: x[nbr] += alpha * last_rank[n] * stochastic_graph[n][nbr][weight] x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * personalization_vector[n] print(x) print("\n----------------------------------------------------------") err = sum([abs(x[n] - last_rank[n]) for n in x]) if err < Number_node * tolerance: print("The algoritm is converge and the last rank is :\n") return x
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight="weight", dangling=None): # 节点的pagerank值 if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() if nstart is None: x = dict.fromkeys(W, 1.0 / N) else: s = float(sum(nstart.values())) x = {k: v / s for k, v in nstart.items()} if personalization is None: p = dict.fromkeys(W, 1.0 / N) else: s = float(sum(personalization.values())) p = {k: v / s for k, v in personalization.items()} if dangling is None: dangling_weights = p else: s = float(sum(dangling.values())) dangling_weights = {k: v / s for k, v in dangling.items()} dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) for n in x: for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum * dangling_weights.get( n, 0) + (1.0 - alpha) * p.get(n, 0) err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x raise nx.PowerIterationFailedConvergence(max_iter)
def create_graph(filename): ## This function accepts a filename string as a parameter. ## This filename contains edgelist in the format of "node1 node2" ## which indicates the former node points to the latter. Using ## read_edgelist function of the networkx library , we can easily ## make a directed graph from the given information. The function ## returns a directed graph. g = nx.read_edgelist(filename, create_using=nx.DiGraph()) n = nx.number_of_nodes(g) g = nx.stochastic_graph(g) return g, n
def pagerank_iterative(G, d=0.85, max_iter=100, tol=1.0e-6, weight='weight'): """ PageRank calculation iteratively """ # Step 1: Initiate PageRank N = G.number_of_nodes() # N = 11 node_and_pr = dict.fromkeys(G, 1.0 / N) # Step 2: Create a copy in (right) stochastic form stochastic_graph = nx.stochastic_graph(G, weight=weight) # M = 1/L(pj) # Step 3: Power iteration: make up to max_iter iterations dangling_value = (1 - d) / N for _ in range(max_iter): # for each iteration node_and_prev_pr = node_and_pr node_and_pr = dict.fromkeys(node_and_prev_pr.keys(), 0) for node in node_and_pr: # for each node for out_node in stochastic_graph[node]: # node --> out_node node_and_pr[out_node] += d * node_and_prev_pr[ node] * stochastic_graph[node][out_node][ weight] # PR(p_i) = d * PR(p_j)}/L(p_j) node_and_pr[node] += dangling_value # Plot graph with one iteration ''' out_file = 'wikipedia_pagerank_example_iteration_1.pdf' node_size = [pr*30000 for node, pr in node_and_pr.items()] node_and_labels = {node : node+'\n'+str(round(pr, 3)) for node, pr in node_and_pr.items()} plotnxgraph.plot_graph(G, out_file=out_file, node_size=node_size, node_and_labels=node_and_labels) return ''' # check convergence, l1 norm err = sum([ abs(node_and_pr[node] - node_and_prev_pr[node]) for node in node_and_pr ]) if err < N * tol: return node_and_pr raise NetworkXError( 'pagerank: power iteration failed to converge in {} iterations.'. format(max_iter))
def get_mc_attributes(start_time="2012-04-01 10:00:00", duration=120): # Create csv read iterator data = read_trips_file("hubway_trips_2012.csv") start_time = pd.to_datetime(start_time) end_time = start_time + timedelta(minutes=duration) df = data[(data['start_date'] >= start_time) & (data['end_date'] <= end_time)] stations = read_stations_file("hubway_stations.csv") status = read_status_file("stationstatus_2012_4.csv") status_df = status[status['update'] == start_time] # Remove trips starting or ending in the stations which are not present # in stations dataframe or stations not present in the status file station_ids = set(stations['id']) status_df = status_df[status_df['station_id'].isin(station_ids)] df = df[(df['strt_statn'].isin(station_ids)) & (df['end_statn'].isin(station_ids))] trips_df = pd.DataFrame( {'weight': df.groupby(['strt_statn', 'end_statn']).size()}) trips_df = trips_df.reset_index() print "Number of trips:{}".format(len(df)) print "Creating networkx graph" G = nx.from_pandas_dataframe(trips_df, 'strt_statn', 'end_statn', 'weight', create_using=nx.DiGraph()) G = nx.stochastic_graph(G, weight='weight') # Add stations that are present in status_ids but not in trips_df status_ids = set(status['station_id']) for node in status_ids - set(G.nodes()): G.add_node(node) print "Creating item distribution" initial_item_distribution = {} for node in G.nodes(): try: initial_item_distribution[node] = status_df[ status_df['station_id'] == node].get('nbBikes').item() except: initial_item_distribution[node] = 0 return G, initial_item_distribution
def pagerank(g, max_iter, alpha, tau): sg = nx.stochastic_graph(g) #stochastic graph n_nodes = nx.number_of_nodes(g) nodes = g.nodes() PI = [1.0 / n_nodes] * n_nodes #initialization of pagerank a = [] #dangling nodes vector for n in nodes: if g.out_degree(n): a.append(1) else: a.append(0) H = nx.adjacency_matrix(sg) for i in range(max_iter): pi_previous = PI #v1 = alpha(pi_previous^T*H) v1 = [0] * n_nodes for r in range(n_nodes): row = H[r, :].toarray() for c in range(n_nodes): v1[c] += pi_previous[c] * row[0][c] v1 = [alpha * v for v in v1] #v2 = alpha(pi_previous^T*a)1/n*e^T dang_pi = 0 for e in range(n_nodes): dang_pi += pi_previous[e] * a[e] constant = alpha * dang_pi + 1 - alpha v2 = [float(constant) / n_nodes] * n_nodes #pi = v2 + v3 for e in range(n_nodes): PI[e] = v1[e] + v2[e] PI = normalize(PI) #check convergence delta = 0 for e in range(n_nodes): delta += abs(PI[e] - pi_previous[e]) if delta < tau * n_nodes: return transform_pagerank(PI) return transform_pagerank(PI)
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-10, nstart=None, weight='weight', dangling=None): if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G # Create a copy in (right) stochastic form W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() # Choose fixed starting vector if not given x = dict.fromkeys(W, 1.0 / N) p = dict.fromkeys(W, 1.0 / N) dangling_weights = p dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] # power iteration: make up to max_iter iterations for _ in range(max_iter): print('here') print() xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) # print(danglesum) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N*tol: return x return x raise nx.NetworkXError('pagerank: power iteration failed to converge ' 'in %d iterations.' % max_iter)
def pagerank_iterative(G, d=0.85, max_iter=100, tol=1.0e-6, weight='weight'): """ PageRank calculation iteratively """ # Step 1: Initiate PageRank N = G.number_of_nodes() # N = 11 node_and_pr = dict.fromkeys(G, 1.0 / N) # Step 2: Create a copy in (right) stochastic form stochastic_graph = nx.stochastic_graph(G, weight=weight) # M = 1/L(pj) # Step 3: Power iteration: make up to max_iter iterations dangling_value = (1-d)/N for _ in range(max_iter): # for each iteration node_and_prev_pr = node_and_pr node_and_pr = dict.fromkeys(node_and_prev_pr.keys(), 0) for node in node_and_pr: # for each node for out_node in stochastic_graph[node]: # node --> out_node node_and_pr[out_node] += d * node_and_prev_pr[node] * stochastic_graph[node][out_node][weight] # PR(p_i) = d * PR(p_j)}/L(p_j) node_and_pr[node] += dangling_value # Plot graph with one iteration ''' out_file = 'wikipedia_pagerank_example_iteration_1.pdf' node_size = [pr*30000 for node, pr in node_and_pr.items()] node_and_labels = {node : node+'\n'+str(round(pr, 3)) for node, pr in node_and_pr.items()} plotnxgraph.plot_graph(G, out_file=out_file, node_size=node_size, node_and_labels=node_and_labels) return ''' # check convergence, l1 norm err = sum([abs(node_and_pr[node] - node_and_prev_pr[node]) for node in node_and_pr]) if err < N*tol: return node_and_pr raise NetworkXError('pagerank: power iteration failed to converge in {} iterations.'.format(max_iter))
def run_pagerank(self, G, alpha=0.85, pers=None, max_iter=1000, tol=1.0e-6, nstart=None, weight='weight', node_types=None): """Return the PageRank of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ----------- G : graph A NetworkX graph alpha : float, optional Damping parameter for PageRank, default=0.85 pers: dict, optional The "pers vector" consisting of a dictionary with a key for every graph node and nonzero pers value for each node. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. nstart : dictionary, optional Starting value of PageRank iteration for each node. weight : key, optional Edge data key to use as weight. If None weights are set to 1. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Notes ----- The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. """ if len(G) == 0: return {} # create a copy in (right) stochastic form W = nx.stochastic_graph(G, weight=weight) scale = 1.0 / W.number_of_nodes() # choose fixed starting vector if not given if nstart is None: x = dict.fromkeys(W, scale) else: x = nstart # normalize starting vector to 1 s = 1.0/sum(x.values()) for k in x: x[k]*=s # assign uniform pers vector if not given if pers is None: pers = dict.fromkeys(W, scale) else: # Normalize the sum to 1 s = sum(pers.values()) for k in pers.keys(): pers[k] /= s if len(pers)!=len(G): raise Exception('Personalization vector must have a value for every node') # "dangling" nodes, no links out from them out_degree = W.out_degree() dangle = [n for n in W if out_degree[n]==0.0] itr = 0 while True: # power iteration: make up to max_iter iterations xlast = x x = dict.fromkeys(xlast.keys(), 0) # "dangling" nodes only consume energies, so we release these energies manually danglesum = alpha*scale*sum(xlast[n] for n in dangle) # danglesum = 0 for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr] += alpha*xlast[n]*W[n][nbr][weight] x[n] += danglesum + (1 - alpha) * pers[n] # normalize vector s = 1.0 / sum(x.values()) for n in x: x[n]*=s # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) if err < tol: print "converged in %d iterations." % itr break if itr > max_iter: raise Exception('pagerank: power iteration failed to converge ' 'in %d iterations.'%(itr-1)) itr += 1 # Returns: # x: PageRank of each node; # l: Detailed contributions of each layer; # itr: Iterations to converge. return x, itr
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight='weight', dangling=None): """Return the PageRank of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ----------- G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. alpha : float, optional Damping parameter for PageRank, default=0.85. personalization: dict, optional The "personalization vector" consisting of a dictionary with a key for every graph node and nonzero personalization value for each node. By default, a uniform distribution is used. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. nstart : dictionary, optional Starting value of PageRank iteration for each node. weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified). This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Examples -------- >>> G = nx.DiGraph(nx.path_graph(4)) >>> pr = nx.pagerank(G, alpha=0.9) Notes ----- The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The PageRank algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs by converting each edge in the directed graph to two edges. See Also -------- pagerank_numpy, pagerank_scipy, google_matrix References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, The PageRank citation ranking: Bringing order to the Web. 1999 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf """ if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G # Create a copy in (right) stochastic form W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() # Choose fixed starting vector if not given if nstart is None: x = dict.fromkeys(W, 1.0 / N) else: # Normalized nstart vector s = float(sum(nstart.values())) x = dict((k, v / s) for k, v in nstart.items()) if personalization is None: # Assign uniform personalization vector if not given p = dict.fromkeys(W, 1.0 / N) else: missing = set(G) - set(personalization) if missing: raise NetworkXError('Personalization dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(personalization.values())) p = dict((k, v / s) for k, v in personalization.items()) if dangling is None: # Use personalization vector if dangling vector not specified dangling_weights = p else: missing = set(G) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(dangling.values())) dangling_weights = dict((k, v/s) for k, v in dangling.items()) dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N*tol: return x raise NetworkXError('pagerank: power iteration failed to converge ' 'in %d iterations.' % max_iter)
def stochastic(klass, D, weight): # create a copy in (right) stochastic form ## a form in which transition probs. are equally distributed H = nx.stochastic_graph(D, weight=weight) return H
def pagerank(G,alpha=0.85,max_iter=100,tol=1.0e-8,nstart=None): """Return the PageRank of the nodes in the graph. PageRank computes the largest eigenvector of the stochastic adjacency matrix of G. Parameters ----------- G : graph A networkx graph alpha : float, optional Parameter for PageRank, default=0.85 max_iter : interger, optional Maximum number of iterations in power method. tol : float, optional Error tolerance used to check convergence in power method iteration. nstart : dictionary, optional Starting value of PageRank iteration for each node. Returns ------- nodes : dictionary Dictionary of nodes with value as PageRank Examples -------- >>> G=nx.DiGraph(nx.path_graph(4)) >>> pr=nx.pagerank(G,alpha=0.9) Notes ----- The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The PageRank algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs. For an overview see: A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html """ import networkx if type(G) == networkx.MultiGraph or type(G) == networkx.MultiDiGraph: raise Exception("pagerank() not defined for graphs with multiedges.") # create a copy in (right) stochastic form W=networkx.stochastic_graph(G) # choose fixed starting vector if not given if nstart is None: x=dict.fromkeys(W,1.0/W.number_of_nodes()) else: x=nstart # normalize starting vector to 1 s=1.0/sum(x.values()) for k in x: x[k]*=s nnodes=W.number_of_nodes() # "dangling" nodes, no links out from them out_degree=W.out_degree(with_labels=True) # dangle=[n for n in W if sum(W[n].values())==0.0] dangle=[n for n in W if out_degree[n]==0.0] # pagerank power iteration: make up to max_iter iterations for i in range(max_iter): xlast=x x=dict.fromkeys(xlast.keys(),0) danglesum=alpha/nnodes*sum(xlast[n] for n in dangle) teleportsum=(1.0-alpha)/nnodes*sum(xlast.values()) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr]+=alpha*xlast[n]*W[n][nbr]['weight'] x[n]+=danglesum+teleportsum # normalize vector to 1 s=1.0/sum(x.values()) for n in x: x[n]*=s # check convergence, l1 norm err=sum([abs(x[n]-xlast[n]) for n in x]) if err < tol: return x raise NetworkXError("pagerank: power iteration failed to converge in %d iterations."%(i+1))
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-8, nstart=None, weight='weight'): """Return the PageRank of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ----------- G : graph A NetworkX graph alpha : float, optional Damping parameter for PageRank, default=0.85 personalization: dict, optional The "personalization vector" consisting of a dictionary with a key for every graph node and nonzero personalization value for each node. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. nstart : dictionary, optional Starting value of PageRank iteration for each node. weight : key, optional Edge data key to use as weight. If None weights are set to 1. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Examples -------- >>> G=nx.DiGraph(nx.path_graph(4)) >>> pr=nx.pagerank(G,alpha=0.9) Notes ----- The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The PageRank algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs by converting each oriented edge in the directed graph to two edges. """ if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: raise Exception("pagerank() not defined for graphs with multiedges.") if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G # create a copy in (right) stochastic form # each row of W sum up to be one W = nx.stochastic_graph(D, weight=weight) scale = 1.0 / W.number_of_nodes() # choose fixed starting vector if not given if nstart is None: x = dict.fromkeys(W, scale) else: x = nstart # normalize starting vector to 1 s = 1.0 / sum(x.values()) for k in x: x[k] *= s # assign uniform personalization/teleportation vector if not given if personalization is None: # teleport p = dict.fromkeys(W, scale) else: # teleport with bias p = personalization # normalize starting vector to 1 s = 1.0 / sum(p.values()) for k in p: p[k] *= s if set(p) != set(G): raise NetworkXError('Personalization vector ' 'must have a value for every node') # "dangling" nodes, no links out from them out_degree = W.out_degree() dangle = [ n for n in W if out_degree[n]==0.0 ] i = 0 # real 'tol' tol = W.number_of_nodes() * tol # while True: # power iteration: make up to max_iter iterations xlast = x # x is the vector containing the value of page rank x = dict.fromkeys(xlast.keys(), 0) # dangle nodes have no out links, so we sum all the rank for these # nodes, and then scale it and alpha it for the next step # just like making each dangle have pseudo edge to every link to the web danglesum = alpha * scale * sum(xlast[n] for n in dangle) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W # W is inlink form for nbr in W[n]:# linear combination of lines x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum + (1.0-alpha)*p[n] # normalize vector s = 1.0 / sum(x.values()) for n in x: x[n] *= s # check convergence, l1 norm err = sum([abs(x[n]-xlast[n]) for n in x]) if err < tol: # ok break if i > max_iter: raise NetworkXError('pagerank: power iteration failed to converge ' 'in %d iterations.'%(i-1)) i+=1 return x
def divrank(G, alpha=0.25, d=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight='weight', dangling=None): ''' Returns the DivRank (Diverse Rank) of the nodes in the graph. This code is based on networkx.pagerank. Args: (diff from pagerank) alpha: controls strength of self-link [0.0-1.0] d: the damping factor Reference: Qiaozhu Mei and Jian Guo and Dragomir Radev, DivRank: the Interplay of Prestige and Diversity in Information Networks, http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.174.7982 ''' if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G # Create a copy in (right) stochastic form W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() # self-link (DivRank) for n in W.nodes_iter(): for n_ in W.nodes_iter(): if n != n_ : if n_ in W[n]: W[n][n_][weight] *= alpha else: if n_ not in W[n]: W.add_edge(n, n_) W[n][n_][weight] = 1.0 - alpha # Choose fixed starting vector if not given if nstart is None: x = dict.fromkeys(W, 1.0 / N) else: # Normalized nstart vector s = float(sum(nstart.values())) x = dict((k, v / s) for k, v in nstart.items()) if personalization is None: # Assign uniform personalization vector if not given p = dict.fromkeys(W, 1.0 / N) else: missing = set(G) - set(personalization) if missing: raise NetworkXError('Personalization dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(personalization.values())) p = dict((k, v / s) for k, v in personalization.items()) if dangling is None: # Use personalization vector if dangling vector not specified dangling_weights = p else: missing = set(G) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(dangling.values())) dangling_weights = dict((k, v/s) for k, v in dangling.items()) dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = d * sum(xlast[n] for n in dangling_nodes) for n in x: D_t = sum(W[n][nbr][weight] * xlast[nbr] for nbr in W[n]) for nbr in W[n]: #x[nbr] += d * xlast[n] * W[n][nbr][weight] x[nbr] += ( d * (W[n][nbr][weight] * xlast[nbr] / D_t) * xlast[n] ) x[n] += danglesum * dangling_weights[n] + (1.0 - d) * p[n] # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N*tol: return x raise NetworkXError('divrank: power iteration failed to converge ' 'in %d iterations.' % max_iter)
def test_graph_disallowed(self): nx.stochastic_graph(nx.Graph())
def test_multigraph_disallowed(self): nx.stochastic_graph(nx.MultiGraph())
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-9, nstart=None, weight='weight', dangling=None): """Return the PageRank of the nodes in the graph. Parameters ----------- G : graph A NetworkX graph. 在PageRank算法里面是有向图 alpha : float, optional 稳定系数, 默认0.85, 心灵漂移teleporting系数,用于解决spider trap问题 personalization: dict, optional 个性化向量,确定在分配中各个节点的权重 格式举例,比如四个点的情况: {1:0.25,2:0.25,3:0.25,4:0.25} 默认个点权重相等,也可以给某个节点多分配些权重,需保证权重和为1. max_iter : integer, optional 最大迭代次数 tol : float, optional 迭代阈值 nstart : dictionary, optional 整个网络各节点PageRank初始值 weight : key, optional 各边权重 dangling: dict, optional 字典存储的是dangling边的信息 key --dangling边的尾节点,也就是dangling node节点 value --dangling边的权重 PR值按多大程度将资源分配给dangling node是根据personalization向量分配的 This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. Notes ----- 特征值计算是通过迭代方法进行的,不能保证收敛,当超过最大迭代次数时,还不能减小到阈值内,就会报错 """ #步骤一:图结构的准备-------------------------------------------------------------------------------- if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G # Create a copy in (right) stochastic form W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() # 确定PR向量的初值 if nstart is None: x = dict.fromkeys(W, 1.0 / N) #和为1 else: # Normalized nstart vector s = float(sum(nstart.values())) x = dict((k, v / s) for k, v in nstart.items()) if personalization is None: # Assign uniform personalization vector if not given p = dict.fromkeys(W, 1.0 / N) else: missing = set(G) - set(personalization) if missing: raise NetworkXError('Personalization dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(personalization.values())) p = dict((k, v / s) for k, v in personalization.items()) #归一化处理 if dangling is None: # Use personalization vector if dangling vector not specified dangling_weights = p else: missing = set(G) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(dangling.values())) dangling_weights = dict((k, v/s) for k, v in dangling.items()) dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] #dangling_nodes dangling节点 #danglesum dangling节点PR总值 #dangling初始化 默认为personalization #dangling_weights 根据dangling而生成,决定dangling node资源如何分配给全局的矩阵 # 迭代计算-------------------------------------------------------------------- #PR=alpha*(A*PR+dangling分配)+(1-alpha)*平均分配 #也就是三部分,A*PR其实是我们用图矩阵分配的,dangling分配则是对dangling node的PR值进行分配,(1-alpha)分配则是天下为公大家一人一份分配的 #其实通俗的来说,我们可以将PageRank看成抢夺大赛,有三种抢夺机制。 #1,A*PR这种是自由分配,大家都愿意参与竞争交流的分配 #2,dangling是强制分配,有点类似打倒土豪分田地的感觉,你不参与自由市场,那好,我们就特地帮你强制分。 #3,平均分配,其实就是有个机会大家实现共产主义了,不让spider trap这种产生rank sink的节点捞太多油水,其实客观上也是在帮dangling分配。 #从图和矩阵的角度来说,可以这样理解,我们这个矩阵可以看出是个有向图 #矩阵要收敛-->矩阵有唯一解-->n阶方阵对应有向图是强连通的-->两个节点相互可达,1能到2,2能到1 #如果是个强连通图,就是我们上面说的第1种情况,自由竞争,那么我们可以确定是收敛的 #不然就会有spider trap造成rank sink问题 for _ in range(max_iter): print 'itertime:', _ xlast = x x = dict.fromkeys(xlast.keys(), 0) #x初值 danglesum = alpha * sum(xlast[n] for n in dangling_nodes) #第2部分:计算dangling_nodes的PR总值 for n in x: for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] #第1部分:将节点n的PR资源分配给各个节点,循环之 for n in x: x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] #第3部分:节点n加上dangling nodes和均分的值 # 迭代检查 err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N*tol: return x return x raise NetworkXError('pagerank: power iteration failed to converge ' 'in %d iterations.' % max_iter)
def test_stochastic_multigraph_input(): S = nx.stochastic_graph(nx.MultiGraph())
def pagerank(G,alpha=0.85,personalization=None, max_iter=100,tol=1.0e-8,nstart=None,weight='weight'): """Return the PageRank of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ----------- G : graph A NetworkX graph alpha : float, optional Damping parameter for PageRank, default=0.85 personalization: dict, optional The "personalization vector" consisting of a dictionary with a key for every graph node and nonzero personalization value for each node. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. nstart : dictionary, optional Starting value of PageRank iteration for each node. weight : key, optional Edge data key to use as weight. If None weights are set to 1. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Examples -------- >>> G=nx.DiGraph(nx.path_graph(4)) >>> pr=nx.pagerank(G,alpha=0.9) Notes ----- The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The PageRank algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs by converting each oriented edge in the directed graph to two edges. See Also -------- pagerank_numpy, pagerank_scipy, google_matrix References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, The PageRank citation ranking: Bringing order to the Web. 1999 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf """ if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: raise Exception("pagerank() not defined for graphs with multiedges.") if len(G) == 0: return {} if not G.is_directed(): D=G.to_directed() else: D=G # create a copy in (right) stochastic form W=nx.stochastic_graph(D, weight=weight) scale=1.0/W.number_of_nodes() # choose fixed starting vector if not given if nstart is None: x=dict.fromkeys(W,scale) else: x=nstart # normalize starting vector to 1 s=1.0/sum(x.values()) for k in x: x[k]*=s # assign uniform personalization/teleportation vector if not given if personalization is None: p=dict.fromkeys(W,scale) else: p=personalization # normalize starting vector to 1 s=1.0/sum(p.values()) for k in p: p[k]*=s if set(p)!=set(G): raise NetworkXError('Personalization vector ' 'must have a value for every node') # "dangling" nodes, no links out from them out_degree=W.out_degree() dangle=[n for n in W if out_degree[n]==0.0] i=0 while True: # power iteration: make up to max_iter iterations xlast=x x=dict.fromkeys(xlast.keys(),0) danglesum=alpha*scale*sum(xlast[n] for n in dangle) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr]+=alpha*xlast[n]*W[n][nbr][weight] x[n]+=danglesum+(1.0-alpha)*p[n] # normalize vector s=1.0/sum(x.values()) for n in x: x[n]*=s # check convergence, l1 norm err=sum([abs(x[n]-xlast[n]) for n in x]) if err < tol: break if i>max_iter: raise NetworkXError('pagerank: power iteration failed to converge ' 'in %d iterations.'%(i-1)) i+=1 return x
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight='weight', dangling=None): """ Return the PageRank of the nodes in the graph. Source code from http://networkx.readthedocs.io/en/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank """ if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G # Step 1: Create a copy in (right) stochastic form W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() # N = 11 # Plot the stochastic graph out_file = 'wikipedia_pagerank_example_stochastic_graph.pdf' edge_and_labels = {k : round(v, 2) for k, v in nx.get_edge_attributes(W, 'weight').items()} plot_graph(W, out_file=out_file, edge_and_labels=edge_and_labels) # Step 2: Choose fixed starting vector if not given if nstart is None: x = dict.fromkeys(W, 1.0 / N) else: # Normalized nstart vector s = float(sum(nstart.values())) x = dict((k, v / s) for k, v in nstart.items()) # plot a graph with nstart: starting value of PageRank iteration for each node. out_file = 'wikipedia_pagerank_example_nstart.pdf' node_and_labels = {k : k+'\n'+str(round(v, 2)) for k, v in x.items()} plot_graph(W, out_file=out_file, node_and_labels=node_and_labels) # Step 3: Assign uniform personalization vector if not given if personalization is None: p = dict.fromkeys(W, 1.0 / N) # node and nonzero personalization value for each node else: missing = set(G) - set(personalization) if missing: raise NetworkXError('Personalization dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(personalization.values())) p = dict((k, v / s) for k, v in personalization.items()) # Step 4: Use personalization vector if dangling vector not specified if dangling is None: dangling_weights = p else: missing = set(G) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(dangling.values())) dangling_weights = dict((k, v/s) for k, v in dangling.items()) dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] # Step 5: power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x # pagerank for each node x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] # PR(p_i) = d * PR(p_j)}/L(p_j) x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] # danglesum/N + (1-d)/N # Plot graph with one iteration ''' out_file = 'wikipedia_pagerank_example_iteration_1.pdf' node_and_pr = x node_size = [pr*30000 for node, pr in node_and_pr.items()] node_and_labels = {node : node+'\n'+str(round(pr, 3)) for node, pr in node_and_pr.items()} plot_graph(G, out_file=out_file, node_size=node_size, node_and_labels=node_and_labels) ''' # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N*tol: return x raise NetworkXError('pagerank: power iteration failed to converge in %d iterations.' % max_iter)
def build_bipartite_graph(import_export_data, country_list=[], product_list=[]): """ If country_list and product_list are not empty, then we only care about countries and products in the lists. """ graph = nx.DiGraph() country_ids = {} product_ids = {} next_id = 0 countries = set(import_export_data.keys()) products = set([y['id'] for x in import_export_data.values() for y in x]) filtered_countries = countries & set(country_list) if country_list else countries filtered_products = products & set(product_list) if product_list else products print "loading %s countries and %s products..." % (len(filtered_countries), len(filtered_products)) # add country nodes for country in filtered_countries: graph.add_node(next_id, type="country", entity_id=country ) country_ids[country] = next_id next_id += 1 # add country nodes for product in filtered_products: graph.add_node(next_id, type="product", entity_id=product ) product_ids[product] = next_id next_id += 1 # add country - product edges for country, records in import_export_data.iteritems(): if not country in filtered_countries: continue for each_record in records: if not each_record['id'] in filtered_products: continue if not each_record['import'] == 0: graph.add_edge(country_ids[country], product_ids[each_record['id']], weight=each_record['import']) # import edge if not each_record['export'] == 0: graph.add_edge(product_ids[each_record['id']], country_ids[country], weight=each_record['export']) # export edge # create a copy in (right) stochastic form W = nx.stochastic_graph(graph, weight='weight') return W
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-10, nstart=None, weight='weight', dangling=None): """Return the PageRank of the nodes in the graph. G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. alpha : float, optional Damping parameter for PageRank, default=0.85. personalization: dict, optional The "personalization vector" consisting of a dictionary with a key for every graph node and nonzero personalization value for each node. By default, a uniform distribution is used. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. nstart : dictionary, optional Starting value of PageRank iteration for each node. weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified). This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. """ if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G # Create a copy in (right) stochastic form W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() # Choose fixed starting vector if not given x = dict.fromkeys(W, 1.0 / N) p = dict.fromkeys(W, 1.0 / N) dangling_weights = p dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] # power iteration: make up to max_iter iterations for _ in range(max_iter): # print('here') xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) # print(danglesum) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] # check convergence, l1 norm # err = sum([abs(x[n] - xlast[n]) for n in x]) # if err < N*tol: # return x return x raise nx.NetworkXError('pagerank: power iteration failed to converge ' 'in %d iterations.' % max_iter)
def pagerank(G, alpha=0.85, max_iter=100, tol=1e-4, x_start=None, personalization=None): """ Compute and return the PageRank in an directed graph (also see networkx documentation). The output is a dictionary mapping the node-id to its PageRank value. Also, the number of iterations to convergence is returned. """ # some precondition checking. (we could also convert undirected to directed.) if not G.is_directed(): raise Exception("pagerank() only defined for directed graphs.") # to be completely correct we should also remove self-referential nodes, # but let's just ignore this for performancy issues at the moment # and assume the input does not containt self-referential nodes. # G.remove_edges_from(G.selfloop_edges()) nodes = G.nodes(); nb_nodes = len(nodes); if nb_nodes == 0: return {} # value for nodes without backlinks min_value = (1.0-alpha)/nb_nodes # initial pagerank dict if x_start == None: # initialize the PageRank dict with 1/N for all nodes x = dict.fromkeys(nodes, 1.0/nb_nodes) else: x = x_start # normalize starting vector to 1 s = 1.0/sum(x.values()) for k in x: x[k]*=s # assign uniform personalization/teleportation vector if not given """if personalization is None: p = dict.fromkeys(nodes,1.0/nb_nodes) else: p = personalization # normalize starting vector to 1 s = 1.0/sum(p.values()) for k in p: p[k]*=s if set(p)!=set(G): raise Exception('Personalization vector must have a value for every node') """ # "dangling" nodes, no links out from them; fix them out_degree = G.out_degree() for dangling in (n for n in nodes if out_degree[n]==0.0): for n in nodes: G.add_edge(dangling, n) # create a copy in (right) stochastic form which we will use # to avoid recalculating the number of outgoing links every time W=nx.stochastic_graph(G) #W = G # now the iterative algorithm # (which is basically a version of the power method, # without using explicit matrix multiplications) i = 0 while True: # uncomment following 2 lines if you want to view each iteration #print "iteration %d:" % i #print "pagerank:", x i += 1 # after maximum iterations have been reached, stop if i > max_iter: print "no convergence after {0} iterations!".format(max_iter) break # some helper variables diff = 0 #total difference compared to last iteration x_new = dict.fromkeys(nodes, 0) # the dict where we store our new values # now the pagerank calculations for node in nodes: rank = min_value #print "node", node #print "min value", min_value for referring_page in W.predecessors_iter(node): #print "refered by ", referring_page #print "old value", old_pagerank[referring_page] #print "G out degree", G.out_degree(referring_page) rank += alpha * x[referring_page] * W[referring_page][node]['weight'] # or / G.out_degree(referring_page) # the personalization # rank += min_value * p[node] diff += abs(rank - x[node]) #accumulate the difference x_new[node] = rank x = x_new # our new pagerank #print pagerank #stop if converged if diff < tol: #print "converged after {0} iterations".format(i) break #normalize PageRank total = sum(x.values()) if total!=0: s = 1.0/total for n in x: x[n] *= s return x,i
def test_stochastic_graph_input(): S = nx.stochastic_graph(nx.Graph())
def test_pagerank_algo(self): W = nx.stochastic_graph(self.D, weight='weight') prs = pagerank_unrecorded(W, personalization=self._personalization, weight='weight') for key,val in prs.iteritems(): self.assertEqual(val , self.result_pr[key])