def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight='weight', dangling=None): """Return the Google matrix of the graph. Parameters ---------- G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. alpha : float The damping factor. personalization: dict, optional The "personalization vector" consisting of a dictionary with a key for every graph node and nonzero personalization value for each node. By default, a uniform distribution is used. nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified) This must be selected to result in an irreducible transition matrix (see notes below). It may be common to have the dangling dict to be the same as the personalization dict. Returns ------- A : NumPy matrix Google matrix of the graph Notes ----- The matrix returned represents the transition matrix that describes the Markov chain used in PageRank. For PageRank to converge to a unique solution (i.e., a unique stationary distribution in a Markov chain), the transition matrix must be irreducible. In other words, it must be that there exists a path between every pair of nodes in the graph, or else there is the potential of "rank sinks." This implementation works with Multi(Di)Graphs. For multigraphs the weight between two nodes is set to be the sum of all edge weights between those nodes. See Also -------- pagerank, pagerank_numpy, pagerank_scipy """ import numpy as np if nodelist is None: nodelist = list(G) M = nx.to_numpy_matrix(G, nodelist=nodelist, weight=weight) N = len(G) if N == 0: return M # Personalization vector if personalization is None: p = np.repeat(1.0 / N, N) else: missing = set(nodelist) - set(personalization) if missing: raise NetworkXError('Personalization vector dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) p = np.array([personalization[n] for n in nodelist], dtype=float) p /= p.sum() # Dangling nodes if dangling is None: dangling_weights = p else: missing = set(nodelist) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) # Convert the dangling dictionary into an array in nodelist order dangling_weights = np.array([dangling[n] for n in nodelist], dtype=float) dangling_weights /= dangling_weights.sum() dangling_nodes = np.where(M.sum(axis=1) == 0)[0] # Assign dangling_weights to any dangling nodes (nodes with no out links) for node in dangling_nodes: M[node] = dangling_weights M /= M.sum(axis=1) # Normalize rows to sum to 1 return alpha * M + (1 - alpha) * p
def hexagonal_lattice_graph(m, n, periodic=False, with_positions=True, create_using=None): """Returns an `m` by `n` hexagonal lattice graph. The *hexagonal lattice graph* is a graph whose nodes and edges are the `hexagonal tiling`_ of the plane. The returned graph will have `m` rows and `n` columns of hexagons. `Odd numbered columns`_ are shifted up relative to even numbered columns. Positions of nodes are computed by default or `with_positions is True`. Node positions creating the standard embedding in the plane with sidelength 1 and are stored in the node attribute 'pos'. `pos = nx.get_node_attributes(G, 'pos')` creates a dict ready for drawing. .. _hexagonal tiling: https://en.wikipedia.org/wiki/Hexagonal_tiling .. _Odd numbered columns: http://www-cs-students.stanford.edu/~amitp/game-programming/grids/ Parameters ---------- m : int The number of rows of hexagons in the lattice. n : int The number of columns of hexagons in the lattice. periodic : bool Whether to make a periodic grid by joining the boundary vertices. For this to work `n` must be odd and both `n > 1` and `m > 1`. The periodic connections create another row and column of hexagons so these graphs have fewer nodes as boundary nodes are identified. with_positions : bool (default: True) Store the coordinates of each node in the graph node attribute 'pos'. The coordinates provide a lattice with vertical columns of hexagons offset to interleave and cover the plane. Periodic positions shift the nodes vertically in a nonlinear way so the edges don't overlap so much. create_using : NetworkX graph constructor, optional (default=nx.Graph) Graph type to create. If graph instance, then cleared before populated. If graph is directed, edges will point up or right. Returns ------- NetworkX graph The *m* by *n* hexagonal lattice graph. """ G = empty_graph(0, create_using) if m == 0 or n == 0: return G if periodic and (n % 2 == 1 or m < 2 or n < 2): msg = "periodic hexagonal lattice needs m > 1, n > 1 and even n" raise NetworkXError(msg) M = 2 * m # twice as many nodes as hexagons vertically rows = range(M + 2) cols = range(n + 1) # make lattice col_edges = (((i, j), (i, j + 1)) for i in cols for j in rows[:M + 1]) row_edges = (((i, j), (i + 1, j)) for i in cols[:n] for j in rows if i % 2 == j % 2) G.add_edges_from(col_edges) G.add_edges_from(row_edges) # Remove corner nodes with one edge G.remove_node((0, M + 1)) G.remove_node((n, (M + 1) * (n % 2))) # identify boundary nodes if periodic if periodic: for i in cols[:n]: G = contracted_nodes(G, (i, 0), (i, M)) for i in cols[1:]: G = contracted_nodes(G, (i, 1), (i, M + 1)) for j in rows[1:M]: G = contracted_nodes(G, (0, j), (n, j)) G.remove_node((n, M)) # calc position in embedded space ii = (i for i in cols for j in rows) jj = (j for i in cols for j in rows) xx = (0.5 + i + i // 2 + (j % 2) * ((i % 2) - .5) for i in cols for j in rows) h = sqrt(3) / 2 yy = (h * j for i in cols for j in rows) # exclude nodes not in G pos = {(i, j): (x, y) for i, j, x, y in zip(ii, jj, xx, yy) if (i, j) in G} set_node_attributes(G, pos, 'pos') return G
def add_edges_from(self, ebunch, attr_dict=None, **attr): """Add all the edges in ebunch. Parameters ---------- ebunch : container of edges Each edge given in the container will be added to the graph. The edges must be given as as 2-tuples (u,v) or 3-tuples (u,v,d) where d is a dictionary containing edge data. attr_dict : dictionary, optional (default= no attributes) Dictionary of edge attributes. Key/value pairs will update existing data associated with each edge. attr : keyword arguments, optional Edge data (or labels or objects) can be assigned using keyword arguments. See Also -------- add_edge : add a single edge add_weighted_edges_from : convenient way to add weighted edges Notes ----- Adding the same edge twice has no effect but any edge data will be updated when each duplicate edge is added. Edge attributes specified in edges as a tuple take precedence over attributes specified generally. Examples -------- >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc >>> G.add_edges_from([(0,1),(1,2)]) # using a list of edge tuples >>> e = zip(range(0,3),range(1,4)) >>> G.add_edges_from(e) # Add the path graph 0-1-2-3 Associate data to edges >>> G.add_edges_from([(1,2),(2,3)], weight=3) >>> G.add_edges_from([(3,4),(1,4)], label='WN2898') """ # set up attribute dict if attr_dict is None: attr_dict = attr else: try: attr_dict.update(attr) except AttributeError: raise NetworkXError(\ "The attr_dict argument must be a dict.") # process ebunch for e in ebunch: ne = len(e) if ne == 3: u, v, dd = e assert hasattr(dd, "update") elif ne == 2: u, v = e dd = {} else: raise NetworkXError(\ "Edge tuple %s must be a 2-tuple or 3-tuple."%(e,)) if u not in self.succ: self.succ[u] = {} self.pred[u] = {} self.node[u] = {} if v not in self.succ: self.succ[v] = {} self.pred[v] = {} self.node[v] = {} datadict = self.adj[u].get(v, {}) datadict.update(attr_dict) datadict.update(dd) self.succ[u][v] = datadict self.pred[v][u] = datadict
def remove_edge(self, u, v, key=None): """Remove an edge between u and v. Parameters ---------- u, v : nodes Remove an edge between nodes u and v. key : hashable identifier, optional (default=None) Used to distinguish multiple edges between a pair of nodes. If None remove a single (abritrary) edge between u and v. Raises ------ NetworkXError If there is not an edge between u and v, or if there is no edge with the specified key. See Also -------- remove_edges_from : remove a collection of edges Examples -------- >>> G = nx.MultiDiGraph() >>> G.add_path([0,1,2,3]) >>> G.remove_edge(0,1) >>> e = (1,2) >>> G.remove_edge(*e) # unpacks e from an edge tuple For multiple edges >>> G = nx.MultiDiGraph() >>> G.add_edges_from([(1,2),(1,2),(1,2)]) >>> G.remove_edge(1,2) # remove a single (arbitrary) edge For edges with keys >>> G = nx.MultiDiGraph() >>> G.add_edge(1,2,key='first') >>> G.add_edge(1,2,key='second') >>> G.remove_edge(1,2,key='second') """ try: d = self.adj[u][v] except (KeyError): raise NetworkXError("The edge %s-%s is not in the graph." % (u, v)) # remove the edge with specified data if key is None: d.popitem() else: try: del d[key] except (KeyError): raise NetworkXError( "The edge %s-%s with key %s is not in the graph." % (u, v, key)) if len(d) == 0: # remove the key entries if last edge del self.succ[u][v] del self.pred[v][u]
def partial_duplication_graph(N, n, p, q, seed=None): """Returns a random graph using the partial duplication model. Parameters ---------- N : int The total number of nodes in the final graph. n : int The number of nodes in the initial clique. p : float The probability of joining each neighbor of a node to the duplicate node. Must be a number in the between zero and one, inclusive. q : float The probability of joining the source node to the duplicate node. Must be a number in the between zero and one, inclusive. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Notes ----- A graph of nodes is grown by creating a fully connected graph of size `n`. The following procedure is then repeated until a total of `N` nodes have been reached. 1. A random node, *u*, is picked and a new node, *v*, is created. 2. For each neighbor of *u* an edge from the neighbor to *v* is created with probability `p`. 3. An edge from *u* to *v* is created with probability `q`. This algorithm appears in [1]. This implementation allows the possibility of generating disconnected graphs. References ---------- .. [1] Knudsen Michael, and Carsten Wiuf. "A Markov chain approach to randomly grown graphs." Journal of Applied Mathematics 2008. <https://doi.org/10.1155/2008/190836> """ if p < 0 or p > 1 or q < 0 or q > 1: msg = "partial duplication graph must have 0 <= p, q <= 1." raise NetworkXError(msg) if n > N: raise NetworkXError("partial duplication graph must have n <= N.") G = nx.complete_graph(n) for new_node in range(n, N): # Pick a random vertex, u, already in the graph. src_node = seed.randint(0, new_node - 1) # Add a new vertex, v, to the graph. G.add_node(new_node) # For each neighbor of u... for neighbor_node in list(nx.all_neighbors(G, src_node)): # Add the neighbor to v with probability p. if seed.random() < p: G.add_edge(new_node, neighbor_node) # Join v and u with probability q. if seed.random() < q: G.add_edge(new_node, src_node) return G
def parse_sparse6(string): """Read an undirected graph in sparse6 format from string. Parameters ---------- string : string Data in sparse6 format Returns ------- G : Graph Raises ------ NetworkXError If the string is unable to be parsed in sparse6 format Examples -------- >>> G = nx.parse_sparse6(':A_') >>> sorted(G.edges()) [(0, 1), (0, 1), (0, 1)] See Also -------- generate_sparse6, read_sparse6, write_sparse6 References ---------- .. [1] Sparse6 specification <http://users.cecs.anu.edu.au/~bdm/data/formats.html> """ if string.startswith('>>sparse6<<'): string = string[11:] if not string.startswith(':'): raise NetworkXError('Expected leading colon in sparse6') n, data = data_to_n(graph6_to_data(string[1:])) k = 1 while 1 << k < n: k += 1 def parseData(): """Return stream of pairs b[i], x[i] for sparse6 format.""" chunks = iter(data) d = None # partial data word dLen = 0 # how many unparsed bits are left in d while 1: if dLen < 1: d = next(chunks) dLen = 6 dLen -= 1 b = (d >> dLen) & 1 # grab top remaining bit x = d & ((1 << dLen) - 1) # partially built up value of x xLen = dLen # how many bits included so far in x while xLen < k: # now grab full chunks until we have enough d = next(chunks) dLen = 6 x = (x << 6) + d xLen += 6 x = (x >> (xLen - k)) # shift back the extra bits dLen = xLen - k yield b, x v = 0 G = nx.MultiGraph() G.add_nodes_from(range(n)) multigraph = False for b, x in parseData(): if b == 1: v += 1 # padding with ones can cause overlarge number here if x >= n or v >= n: break elif x > v: v = x else: if G.has_edge(x, v): multigraph = True G.add_edge(x, v) if not multigraph: G = nx.Graph(G) return G
def hits_scipy(G, max_iter=100, tol=1.0e-6, normalized=True): """Return HITS hubs and authorities values for nodes. The HITS algorithm computes two numbers for a node. Authorities estimates the node value based on the incoming links. Hubs estimates the node value based on outgoing links. Parameters ----------- G : graph A NetworkX graph max_iter : interger, optional Maximum number of iterations in power method. tol : float, optional Error tolerance used to check convergence in power method iteration. nstart : dictionary, optional Starting value of each node for power method iteration. normalized : bool (default=True) Normalize results by the sum of all of the values. Returns ------- (hubs,authorities) : two-tuple of dictionaries Two dictionaries keyed by node containing the hub and authority values. Examples -------- >>> G=nx.path_graph(4) >>> h,a=nx.hits(G) Notes ----- This implementation uses SciPy sparse matrices. The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The HITS algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Jon Kleinberg, Authoritative sources in a hyperlinked environment Journal of the ACM 46 (5): 604-632, 1999. doi:10.1145/324133.324140. http://www.cs.cornell.edu/home/kleinber/auth.pdf. """ try: import scipy.sparse import numpy as np except ImportError: raise ImportError(\ "hits_scipy() requires SciPy: http://scipy.org/") if len(G) == 0: return {}, {} M = nx.to_scipy_sparse_matrix(G, nodelist=G.nodes()) (n, m) = M.shape # should be square A = M.T * M # authority matrix x = scipy.ones((n, 1)) / n # initial guess # power iteration on authority matrix i = 0 while True: xlast = x x = A * x x = x / x.max() # check convergence, l1 norm err = scipy.absolute(x - xlast).sum() if err < tol: break if i > max_iter: raise NetworkXError(\ "HITS: power iteration failed to converge in %d iterations."%(i+1)) i += 1 a = np.asarray(x).flatten() # h=M*a h = np.asarray(M * a).flatten() if normalized: h = h / h.sum() a = a / a.sum() hubs = dict(zip(G.nodes(), map(float, h))) authorities = dict(zip(G.nodes(), map(float, a))) return hubs, authorities
def asyn_fluidc(G, k, max_iter=100, seed=None): """Returns communities in `G` as detected by Fluid Communities algorithm. The asynchronous fluid communities algorithm is described in [1]_. The algorithm is based on the simple idea of fluids interacting in an environment, expanding and pushing each other. It's initialization is random, so found communities may vary on different executions. The algorithm proceeds as follows. First each of the initial k communities is initialized in a random vertex in the graph. Then the algorithm iterates over all vertices in a random order, updating the community of each vertex based on its own community and the communities of its neighbours. This process is performed several times until convergence. At all times, each community has a total density of 1, which is equally distributed among the vertices it contains. If a vertex changes of community, vertex densities of affected communities are adjusted immediately. When a complete iteration over all vertices is done, such that no vertex changes the community it belongs to, the algorithm has converged and returns. This is the original version of the algorithm described in [1]_. Unfortunately, it does not support weighted graphs yet. Parameters ---------- G : Graph k : integer The number of communities to be found. max_iter : integer The number of maximum iterations allowed. By default 100. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ----- k variable is not an optional argument. References ---------- .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm". [https://arxiv.org/pdf/1703.09307.pdf]. """ # Initial checks if not isinstance(k, int): raise NetworkXError("k must be an integer.") if not k > 0: raise NetworkXError("k must be greater than 0.") if not is_connected(G): raise NetworkXError("Fluid Communities require connected Graphs.") if len(G) < k: raise NetworkXError("k cannot be bigger than the number of nodes.") # Initialization max_density = 1.0 vertices = list(G) seed.shuffle(vertices) communities = {n: i for i, n in enumerate(vertices[:k])} density = {} com_to_numvertices = {} for vertex in communities.keys(): com_to_numvertices[communities[vertex]] = 1 density[communities[vertex]] = max_density # Set up control variables and start iterating iter_count = 0 cont = True while cont: cont = False iter_count += 1 # Loop over all vertices in graph in a random order vertices = list(G) seed.shuffle(vertices) for vertex in vertices: # Updating rule com_counter = Counter() # Take into account self vertex community try: com_counter.update( {communities[vertex]: density[communities[vertex]]}) except KeyError: pass # Gather neighbour vertex communities for v in G[vertex]: try: com_counter.update( {communities[v]: density[communities[v]]}) except KeyError: continue # Check which is the community with highest density new_com = -1 if len(com_counter.keys()) > 0: max_freq = max(com_counter.values()) best_communities = [ com for com, freq in com_counter.items() if (max_freq - freq) < 0.0001 ] # If actual vertex com in best communities, it is preserved try: if communities[vertex] in best_communities: new_com = communities[vertex] except KeyError: pass # If vertex community changes... if new_com == -1: # Set flag of non-convergence cont = True # Randomly chose a new community from candidates new_com = seed.choice(best_communities) # Update previous community status try: com_to_numvertices[communities[vertex]] -= 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] except KeyError: pass # Update new community status communities[vertex] = new_com com_to_numvertices[communities[vertex]] += 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] # If maximum iterations reached --> output actual results if iter_count > max_iter: break # Return results by grouping communities as list of vertices return iter(groups(communities).values())
def from_graph6_bytes(string): """Read a simple undirected graph in graph6 format from string. Parameters ---------- string : string Data in graph6 format, without a trailing newline. Returns ------- G : Graph Raises ------ NetworkXError If the string is unable to be parsed in graph6 format ValueError If any character ``c`` in the input string does not satisfy ``63 <= ord(c) < 127``. Examples -------- >>> G = nx.from_graph6_bytes(b'A_') >>> sorted(G.edges()) [(0, 1)] See Also -------- read_graph6, write_graph6 References ---------- .. [1] Graph6 specification <http://users.cecs.anu.edu.au/~bdm/data/formats.html> """ def bits(): """Returns sequence of individual bits from 6-bit-per-value list of data values.""" for d in data: for i in [5, 4, 3, 2, 1, 0]: yield (d >> i) & 1 if string.startswith(b'>>graph6<<'): string = string[10:] data = [c - 63 for c in string] if any(c > 63 for c in data): raise ValueError('each input character must be in range(63, 127)') n, data = data_to_n(data) nd = (n * (n - 1) // 2 + 5) // 6 if len(data) != nd: raise NetworkXError( 'Expected %d bits but got %d in graph6' % (n * (n - 1) // 2, len(data) * 6)) G = nx.Graph() G.add_nodes_from(range(n)) for (i, j), b in zip([(i, j) for j in range(1, n) for i in range(j)], bits()): if b: G.add_edge(i, j) return G
def onion_layers(G): """Returns the layer of each vertex in the onion decomposition of the graph. The onion decomposition refines the k-core decomposition by providing information on the internal organization of each k-shell. It is usually used alongside the `core numbers`. Parameters ---------- G : NetworkX graph A simple graph without self loops or parallel edges Returns ------- od_layers : dictionary A dictionary keyed by vertex to the onion layer. The layers are contiguous integers starting at 1. Raises ------ NetworkXError The onion decomposition is not implemented for graphs with self loops or parallel edges or for directed graphs. Notes ----- Not implemented for graphs with parallel edges or self loops. Not implemented for directed graphs. See Also -------- core_number References ---------- .. [1] Multi-scale structure and topological anomaly detection via a new network statistic: The onion decomposition L. Hébert-Dufresne, J. A. Grochow, and A. Allard Scientific Reports 6, 31708 (2016) http://doi.org/10.1038/srep31708 .. [2] Percolation and the effective structure of complex networks A. Allard and L. Hébert-Dufresne Physical Review X 9, 011023 (2019) http://doi.org/10.1103/PhysRevX.9.011023 """ if nx.number_of_selfloops(G) > 0: msg = ('Input graph contains self loops which is not permitted; ' 'Consider using G.remove_edges_from(nx.selfloop_edges(G)).') raise NetworkXError(msg) # Dictionaries to register the k-core/onion decompositions. od_layers = {} # Adjacency list neighbors = {v: list(nx.all_neighbors(G, v)) for v in G} # Effective degree of nodes. degrees = dict(G.degree()) # Performs the onion decomposition. current_core = 1 current_layer = 1 # Sets vertices of degree 0 to layer 1, if any. isolated_nodes = [v for v in nx.isolates(G)] if len(isolated_nodes) > 0: for v in isolated_nodes: od_layers[v] = current_layer degrees.pop(v) current_layer = 2 # Finds the layer for the remaining nodes. while len(degrees) > 0: # Sets the order for looking at nodes. nodes = sorted(degrees, key=degrees.get) # Sets properly the current core. min_degree = degrees[nodes[0]] if min_degree > current_core: current_core = min_degree # Identifies vertices in the current layer. this_layer = [] for n in nodes: if degrees[n] > current_core: break this_layer.append(n) # Identifies the core/layer of the vertices in the current layer. for v in this_layer: od_layers[v] = current_layer for n in neighbors[v]: neighbors[n].remove(v) degrees[n] = degrees[n] - 1 degrees.pop(v) # Updates the layer count. current_layer = current_layer + 1 # Returns the dictionaries containing the onion layer of each vertices. return od_layers
def core_number(G): """Returns the core number for each vertex. A k-core is a maximal subgraph that contains nodes of degree k or more. The core number of a node is the largest value k of a k-core containing that node. Parameters ---------- G : NetworkX graph A graph or directed graph Returns ------- core_number : dictionary A dictionary keyed by node to the core number. Raises ------ NetworkXError The k-core is not implemented for graphs with self loops or parallel edges. Notes ----- Not implemented for graphs with parallel edges or self loops. For directed graphs the node degree is defined to be the in-degree + out-degree. References ---------- .. [1] An O(m) Algorithm for Cores Decomposition of Networks Vladimir Batagelj and Matjaz Zaversnik, 2003. https://arxiv.org/abs/cs.DS/0310049 """ if nx.number_of_selfloops(G) > 0: msg = ('Input graph has self loops which is not permitted; ' 'Consider using G.remove_edges_from(nx.selfloop_edges(G)).') raise NetworkXError(msg) degrees = dict(G.degree()) # Sort nodes by degree. nodes = sorted(degrees, key=degrees.get) bin_boundaries = [0] curr_degree = 0 for i, v in enumerate(nodes): if degrees[v] > curr_degree: bin_boundaries.extend([i] * (degrees[v] - curr_degree)) curr_degree = degrees[v] node_pos = {v: pos for pos, v in enumerate(nodes)} # The initial guess for the core number of a node is its degree. core = degrees nbrs = {v: list(nx.all_neighbors(G, v)) for v in G} for v in nodes: for u in nbrs[v]: if core[u] > core[v]: nbrs[u].remove(v) pos = node_pos[u] bin_start = bin_boundaries[core[u]] node_pos[u] = bin_start node_pos[nodes[bin_start]] = pos nodes[bin_start], nodes[pos] = nodes[pos], nodes[bin_start] bin_boundaries[core[u]] += 1 core[u] -= 1 return core
def parse_gml(lines, relabel=True): """Parse GML graph from a string or iterable. Parameters ---------- lines : string or iterable Data in GML format. relabel : bool, optional If True use the GML node label attribute for node names otherwise use the node id. Returns ------- G : MultiGraph or MultiDiGraph Raises ------ ImportError If the pyparsing module is not available. See Also -------- write_gml, read_gml Notes ----- This stores nested GML attributes as dictionaries in the NetworkX graph, node, and edge attribute structures. Requires pyparsing: http://pyparsing.wikispaces.com/ References ---------- GML specification: http://www.infosun.fim.uni-passau.de/Graphlet/GML/gml-tr.html """ try: from pyparsing import ParseException except ImportError: try: from matplotlib.pyparsing import ParseException except: raise ImportError('Import Error: not able to import pyparsing:', 'http://pyparsing.wikispaces.com/') try: data = "".join(lines) gml = pyparse_gml() tokens =gml.parseString(data) except ParseException as err: print((err.line)) print((" "*(err.column-1) + "^")) print(err) raise # function to recursively make dicts of key/value pairs def wrap(tok): listtype=type(tok) result={} for k,v in tok: if type(v)==listtype: result[str(k)]=wrap(v) else: result[str(k)]=v return result # Set flag multigraph=False # but assume multigraphs to start if tokens.directed==1: G=nx.MultiDiGraph() else: G=nx.MultiGraph() for k,v in tokens.asList(): if k=="node": vdict=wrap(v) node=vdict['id'] G.add_node(node,attr_dict=vdict) elif k=="edge": vdict=wrap(v) source=vdict.pop('source') target=vdict.pop('target') if G.has_edge(source,target): multigraph=True G.add_edge(source,target,attr_dict=vdict) else: G.graph[k]=v # switch to Graph or DiGraph if no parallel edges were found. if not multigraph: if G.is_directed(): G=nx.DiGraph(G) else: G=nx.Graph(G) if relabel: # relabel, but check for duplicate labels first mapping=[(n,d['label']) for n,d in G.node.items()] x,y=zip(*mapping) if len(set(y))!=len(G): raise NetworkXError('Failed to relabel nodes: ' 'duplicate node labels found. ' 'Use relabel=False.') G=nx.relabel_nodes(G,dict(mapping)) return G
def pagerank_scipy(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, weight='weight', dangling=None): """Return the PageRank of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ---------- G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. alpha : float, optional Damping parameter for PageRank, default=0.85. personalization: dict, optional The "personalization vector" consisting of a dictionary with a key for every graph node and nonzero personalization value for each node. By default, a uniform distribution is used. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified) This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Examples -------- >>> G = nx.DiGraph(nx.path_graph(4)) >>> pr = nx.pagerank_scipy(G, alpha=0.9) Notes ----- The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. This implementation works with Multi(Di)Graphs. For multigraphs the weight between two nodes is set to be the sum of all edge weights between those nodes. See Also -------- pagerank, pagerank_numpy, google_matrix References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, The PageRank citation ranking: Bringing order to the Web. 1999 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf """ import scipy.sparse N = len(G) if N == 0: return {} nodelist = list(G) M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, dtype=float) S = scipy.array(M.sum(axis=1)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M # initial vector x = scipy.repeat(1.0 / N, N) # Personalization vector if personalization is None: p = scipy.repeat(1.0 / N, N) else: missing = set(nodelist) - set(personalization) if missing: raise NetworkXError('Personalization vector dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) p = scipy.array([personalization[n] for n in nodelist], dtype=float) p = p / p.sum() # Dangling nodes if dangling is None: dangling_weights = p else: missing = set(nodelist) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) # Convert the dangling dictionary into an array in nodelist order dangling_weights = scipy.array([dangling[n] for n in nodelist], dtype=float) dangling_weights /= dangling_weights.sum() is_dangling = scipy.where(S == 0)[0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + \ (1 - alpha) * p # check convergence, l1 norm err = scipy.absolute(x - xlast).sum() if err < N * tol: return dict(zip(nodelist, map(float, x))) raise NetworkXError('pagerank_scipy: power iteration failed to converge ' 'in %d iterations.' % max_iter)
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight='weight', dangling=None): """Return the PageRank of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ---------- G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. alpha : float, optional Damping parameter for PageRank, default=0.85. personalization: dict, optional The "personalization vector" consisting of a dictionary with a key for every graph node and nonzero personalization value for each node. By default, a uniform distribution is used. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. nstart : dictionary, optional Starting value of PageRank iteration for each node. weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified). This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Examples -------- >>> G = nx.DiGraph(nx.path_graph(4)) >>> pr = nx.pagerank(G, alpha=0.9) Notes ----- The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The PageRank algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs by converting each edge in the directed graph to two edges. See Also -------- pagerank_numpy, pagerank_scipy, google_matrix References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, The PageRank citation ranking: Bringing order to the Web. 1999 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf """ if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G # Create a copy in (right) stochastic form W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() # Choose fixed starting vector if not given if nstart is None: x = dict.fromkeys(W, 1.0 / N) else: # Normalized nstart vector s = float(sum(nstart.values())) x = dict((k, v / s) for k, v in nstart.items()) if personalization is None: # Assign uniform personalization vector if not given p = dict.fromkeys(W, 1.0 / N) else: missing = set(G) - set(personalization) if missing: raise NetworkXError('Personalization dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(personalization.values())) p = dict((k, v / s) for k, v in personalization.items()) if dangling is None: # Use personalization vector if dangling vector not specified dangling_weights = p else: missing = set(G) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(dangling.values())) dangling_weights = dict((k, v / s) for k, v in dangling.items()) dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x raise NetworkXError('pagerank: power iteration failed to converge ' 'in %d iterations.' % max_iter)
def unexpected(curr_token, expected): category, value, lineno, pos = curr_token value = repr(value) if value is not None else "EOF" raise NetworkXError( f"expected {expected}, found {value} at ({lineno}, {pos})")
def parse_gml_lines(lines, label, destringizer): """Parse GML `lines` into a graph. """ def tokenize(): patterns = [ r'[A-Za-z][0-9A-Za-z_]*\b', # keys r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[Ee][+-]?[0-9]+)?', # reals r'[+-]?[0-9]+', # ints r'".*?"', # strings r'\[', # dict start r'\]', # dict end r'#.*$|\s+' # comments and whitespaces ] tokens = re.compile('|'.join('(' + pattern + ')' for pattern in patterns)) lineno = 0 for line in lines: length = len(line) pos = 0 while pos < length: match = tokens.match(line, pos) if match is not None: for i in range(len(patterns)): group = match.group(i + 1) if group is not None: if i == 0: # keys value = group.rstrip() elif i == 1: # reals value = float(group) elif i == 2: # ints value = int(group) else: value = group if i != 6: # comments and whitespaces yield (i, value, lineno + 1, pos + 1) pos += len(group) break else: raise NetworkXError('cannot tokenize %r at (%d, %d)' % (line[pos:], lineno + 1, pos + 1)) lineno += 1 yield (None, None, lineno + 1, 1) # EOF def unexpected(curr_token, expected): category, value, lineno, pos = curr_token raise NetworkXError( 'expected %s, found %s at (%d, %d)' % (expected, repr(value) if value is not None else 'EOF', lineno, pos)) def consume(curr_token, category, expected): if curr_token[0] == category: return next(tokens) unexpected(curr_token, expected) def parse_kv(curr_token): dct = defaultdict(list) while curr_token[0] == 0: # keys key = curr_token[1] curr_token = next(tokens) category = curr_token[0] if category == 1 or category == 2: # reals or ints value = curr_token[1] curr_token = next(tokens) elif category == 3: # strings value = unescape(curr_token[1][1:-1]) if destringizer: try: value = destringizer(value) except ValueError: pass curr_token = next(tokens) elif category == 4: # dict start curr_token, value = parse_dict(curr_token) else: unexpected(curr_token, "an int, float, string or '['") dct[key].append(value) dct = { key: (value if not isinstance(value, list) or len(value) != 1 else value[0]) for key, value in dct.items() } return curr_token, dct def parse_dict(curr_token): curr_token = consume(curr_token, 4, "'['") # dict start curr_token, dct = parse_kv(curr_token) curr_token = consume(curr_token, 5, "']'") # dict end return curr_token, dct def parse_graph(): curr_token, dct = parse_kv(next(tokens)) if curr_token[0] is not None: # EOF unexpected(curr_token, 'EOF') if 'graph' not in dct: raise NetworkXError('input contains no graph') graph = dct['graph'] if isinstance(graph, list): raise NetworkXError('input contains more than one graph') return graph tokens = tokenize() graph = parse_graph() directed = graph.pop('directed', False) multigraph = graph.pop('multigraph', False) if not multigraph: G = nx.DiGraph() if directed else nx.Graph() else: G = nx.MultiDiGraph() if directed else nx.MultiGraph() G.graph.update((key, value) for key, value in graph.items() if key != 'node' and key != 'edge') def pop_attr(dct, category, attr, i): try: return dct.pop(attr) except KeyError: raise NetworkXError("%s #%d has no '%s' attribute" % (category, i, attr)) nodes = graph.get('node', []) mapping = {} labels = set() for i, node in enumerate(nodes if isinstance(nodes, list) else [nodes]): id = pop_attr(node, 'node', 'id', i) if id in G: raise NetworkXError('node id %r is duplicated' % (id, )) if label != 'id': label = pop_attr(node, 'node', 'label', i) if label in labels: raise NetworkXError('node label %r is duplicated' % (label, )) labels.add(label) mapping[id] = label G.add_node(id, **node) edges = graph.get('edge', []) for i, edge in enumerate(edges if isinstance(edges, list) else [edges]): source = pop_attr(edge, 'edge', 'source', i) target = pop_attr(edge, 'edge', 'target', i) if source not in G: raise NetworkXError('edge #%d has an undefined source %r' % (i, source)) if target not in G: raise NetworkXError('edge #%d has an undefined target %r' % (i, target)) if not multigraph: if not G.has_edge(source, target): G.add_edge(source, target, **edge) else: raise nx.NetworkXError( """edge #%d (%r%s%r) is duplicated Hint: If this is a multigraph, add "multigraph 1" to the header of the file.""" % (i, source, '->' if directed else '--', target)) else: key = edge.pop('key', None) if key is not None and G.has_edge(source, target, key): raise nx.NetworkXError( 'edge #%d (%r%s%r, %r) is duplicated' % (i, source, '->' if directed else '--', target, key)) G.add_edge(source, target, key, **edge) if label != 'id': G = nx.relabel_nodes(G, mapping) return G
def pop_attr(dct, category, attr, i): try: return dct.pop(attr) except KeyError as e: raise NetworkXError( f"{category} #{i} has no {attr!r} attribute") from e
def unexpected(curr_token, expected): category, value, lineno, pos = curr_token raise NetworkXError( 'expected %s, found %s at (%d, %d)' % (expected, repr(value) if value is not None else 'EOF', lineno, pos))
def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): """Return HITS hubs and authorities values for nodes. The HITS algorithm computes two numbers for a node. Authorities estimates the node value based on the incoming links. Hubs estimates the node value based on outgoing links. Parameters ---------- G : graph A NetworkX graph max_iter : interger, optional Maximum number of iterations in power method. tol : float, optional Error tolerance used to check convergence in power method iteration. nstart : dictionary, optional Starting value of each node for power method iteration. normalized : bool (default=True) Normalize results by the sum of all of the values. Returns ------- (hubs,authorities) : two-tuple of dictionaries Two dictionaries keyed by node containing the hub and authority values. Examples -------- >>> G=nx.path_graph(4) >>> h,a=nx.hits(G) Notes ----- The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The HITS algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Jon Kleinberg, Authoritative sources in a hyperlinked environment Journal of the ACM 46 (5): 604-32, 1999. doi:10.1145/324133.324140. http://www.cs.cornell.edu/home/kleinber/auth.pdf. """ if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: raise Exception("hits() not defined for graphs with multiedges.") if len(G) == 0: return {}, {} # choose fixed starting vector if not given if nstart is None: h = dict.fromkeys(G, 1.0 / G.number_of_nodes()) else: h = nstart # normalize starting vector s = 1.0 / sum(h.values()) for k in h: h[k] *= s i = 0 while True: # power iteration: make up to max_iter iterations hlast = h h = dict.fromkeys(hlast.keys(), 0) a = dict.fromkeys(hlast.keys(), 0) # this "matrix multiply" looks odd because it is # doing a left multiply a^T=hlast^T*G for n in h: for nbr in G[n]: a[nbr] += hlast[n] * G[n][nbr].get('weight', 1) # now multiply h=Ga for n in h: for nbr in G[n]: h[n] += a[nbr] * G[n][nbr].get('weight', 1) # normalize vector s = 1.0 / max(h.values()) for n in h: h[n] *= s # normalize vector s = 1.0 / max(a.values()) for n in a: a[n] *= s # check convergence, l1 norm err = sum([abs(h[n] - hlast[n]) for n in h]) if err < tol: break if i > max_iter: raise NetworkXError(\ "HITS: power iteration failed to converge in %d iterations."%(i+1)) i += 1 if normalized: s = 1.0 / sum(a.values()) for n in a: a[n] *= s s = 1.0 / sum(h.values()) for n in h: h[n] *= s return h, a
def pop_attr(dct, category, attr, i): try: return dct.pop(attr) except KeyError: raise NetworkXError("%s #%d has no '%s' attribute" % (category, i, attr))
def add_edge(self, u, v, key=None, attr_dict=None, **attr): """Add an edge between u and v. The nodes u and v will be automatically added if they are not already in the graph. Edge attributes can be specified with keywords or by providing a dictionary with key/value pairs. See examples below. Parameters ---------- u, v : nodes Nodes can be, for example, strings or numbers. Nodes must be hashable (and not None) Python objects. key : hashable identifier, optional (default=lowest unused integer) Used to distinguish multiedges between a pair of nodes. attr_dict : dictionary, optional (default= no attributes) Dictionary of edge attributes. Key/value pairs will update existing data associated with the edge. attr : keyword arguments, optional Edge data (or labels or objects) can be assigned using keyword arguments. See Also -------- add_edges_from : add a collection of edges Notes ----- To replace/update edge data, use the optional key argument to identify a unique edge. Otherwise a new edge will be created. NetworkX algorithms designed for weighted graphs cannot use multigraphs directly because it is not clear how to handle multiedge weights. Convert to Graph using edge attribute 'weight' to enable weighted graph algorithms. Examples -------- The following all add the edge e=(1,2) to graph G: >>> G = nx.MultiDiGraph() >>> e = (1,2) >>> G.add_edge(1, 2) # explicit two-node form >>> G.add_edge(*e) # single edge as tuple of two nodes >>> G.add_edges_from( [(1,2)] ) # add edges from iterable container Associate data to edges using keywords: >>> G.add_edge(1, 2, weight=3) >>> G.add_edge(1, 2, key=0, weight=4) # update data for key=0 >>> G.add_edge(1, 3, weight=7, capacity=15, length=342.7) """ # set up attribute dict if attr_dict is None: attr_dict = attr else: try: attr_dict.update(attr) except AttributeError: raise NetworkXError( "The attr_dict argument must be a dictionary.") # add nodes if u not in self.succ: self.succ[u] = self.adjlist_dict_factory() self.pred[u] = self.adjlist_dict_factory() self.node[u] = {} if v not in self.succ: self.succ[v] = self.adjlist_dict_factory() self.pred[v] = self.adjlist_dict_factory() self.node[v] = {} if v in self.succ[u]: keydict = self.adj[u][v] if key is None: # find a unique integer key # other methods might be better here? key = len(keydict) while key in keydict: key += 1 datadict = keydict.get(key, self.edge_key_dict_factory()) datadict.update(attr_dict) keydict[key] = datadict else: # selfloops work this way without special treatment if key is None: key = 0 datadict = self.edge_attr_dict_factory() datadict.update(attr_dict) keydict = self.edge_key_dict_factory() keydict[key] = datadict self.succ[u][v] = keydict self.pred[v][u] = keydict
def add_edges_from(self, ebunch_to_add, **attr): """Add all the edges in ebunch_to_add. Parameters ---------- ebunch_to_add : container of edges Each edge given in the container will be added to the graph. The edges must be given as 2-tuples (u, v) or 3-tuples (u, v, d) where d is a dictionary containing edge data. attr : keyword arguments, optional Edge data (or labels or objects) can be assigned using keyword arguments. See Also -------- add_edge : add a single edge add_weighted_edges_from : convenient way to add weighted edges Notes ----- Adding the same edge twice has no effect but any edge data will be updated when each duplicate edge is added. Edge attributes specified in an ebunch take precedence over attributes specified via keyword arguments. Examples -------- >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc >>> G.add_edges_from([(0, 1), (1, 2)]) # using a list of edge tuples >>> e = zip(range(0, 3), range(1, 4)) >>> G.add_edges_from(e) # Add the path graph 0-1-2-3 Associate data to edges >>> G.add_edges_from([(1, 2), (2, 3)], weight=3) >>> G.add_edges_from([(3, 4), (1, 4)], label='WN2898') """ for e in ebunch_to_add: ne = len(e) if ne == 3: u, v, dd = e elif ne == 2: u, v = e dd = {} else: raise NetworkXError( "Edge tuple %s must be a 2-tuple or 3-tuple." % (e, )) if u not in self._succ: self._succ[u] = self.adjlist_inner_dict_factory() self._pred[u] = self.adjlist_inner_dict_factory() self._node[u] = self.node_attr_dict_factory() if v not in self._succ: self._succ[v] = self.adjlist_inner_dict_factory() self._pred[v] = self.adjlist_inner_dict_factory() self._node[v] = self.node_attr_dict_factory() datadict = self._adj[u].get(v, self.edge_attr_dict_factory()) datadict.update(attr) datadict.update(dd) self._succ[u][v] = datadict self._pred[v][u] = datadict
def data_to_graph6(data): """Convert 6-bit integer sequence to graph6 character sequence.""" if len(data) > 0 and (min(data) < 0 or max(data) > 63): raise NetworkXError("graph6 data units must be within 0..63") return ''.join([chr(d + 63) for d in data])
def incidence_matrix(G, nodelist=None, edgelist=None, oriented=False, weight=None): """Return incidence matrix of G. The incidence matrix assigns each row to a node and each column to an edge. For a standard incidence matrix a 1 appears wherever a row's node is incident on the column's edge. For an oriented incidence matrix each edge is assigned an orientation (arbitrarily for undirected and aligning to direction for directed). A -1 appears for the tail of an edge and 1 for the head of the edge. The elements are zero otherwise. Parameters ---------- G : graph A NetworkX graph nodelist : list, optional (default= all nodes in G) The rows are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). edgelist : list, optional (default= all edges in G) The columns are ordered according to the edges in edgelist. If edgelist is None, then the ordering is produced by G.edges(). oriented: bool, optional (default=False) If True, matrix elements are +1 or -1 for the head or tail node respectively of each edge. If False, +1 occurs at both nodes. weight : string or None, optional (default=None) The edge data key used to provide each value in the matrix. If None, then each edge has weight 1. Edge weights, if used, should be positive so that the orientation can provide the sign. Returns ------- A : NumPy matrix The incidence matrix of G. Notes ----- For MultiGraph/MultiDiGraph, the edges in edgelist should be (u,v,key) 3-tuples. "Networks are the best discrete model for so many problems in applied mathematics" [1]_. References ---------- .. [1] Gil Strang, Network applications: A = incidence matrix, http://academicearth.org/lectures/network-applications-incidence-matrix """ try: import numpy as np except ImportError: raise ImportError( "incidence_matrix() requires numpy: http://scipy.org/ ") if nodelist is None: nodelist = G.nodes() if edgelist is None: if G.is_multigraph(): edgelist = G.edges(keys=True) else: edgelist = G.edges() A = np.zeros((len(nodelist), len(edgelist))) node_index = dict((node, i) for i, node in enumerate(nodelist)) for ei, e in enumerate(edgelist): (u, v) = e[:2] if u == v: continue # self loops give zero column try: ui = node_index[u] vi = node_index[v] except KeyError: raise NetworkXError( 'node %s or %s in edgelist but not in nodelist"%(u,v)') if weight is None: wt = 1 else: if G.is_multigraph(): ekey = e[2] wt = G[u][v][ekey].get(weight, 1) else: wt = G[u][v].get(weight, 1) if oriented: A[ui, ei] = -wt A[vi, ei] = wt else: A[ui, ei] = wt A[vi, ei] = wt return np.asmatrix(A)
def triangular_lattice_graph(m, n, periodic=False, with_positions=True, create_using=None): r"""Returns the $m$ by $n$ triangular lattice graph. The `triangular lattice graph`_ is a two-dimensional `grid graph`_ in which each square unit has a diagonal edge (each grid unit has a chord). The returned graph has $m$ rows and $n$ columns of triangles. Rows and columns include both triangles pointing up and down. Rows form a strip of constant height. Columns form a series of diamond shapes, staggered with the columns on either side. Another way to state the size is that the nodes form a grid of `m+1` rows and `(n + 1) // 2` columns. The odd row nodes are shifted horizontally relative to the even rows. Directed graph types have edges pointed up or right. Positions of nodes are computed by default or `with_positions is True`. The position of each node (embedded in a euclidean plane) is stored in the graph using equilateral triangles with sidelength 1. The height between rows of nodes is thus $\sqrt(3)/2$. Nodes lie in the first quadrant with the node $(0, 0)$ at the origin. .. _triangular lattice graph: http://mathworld.wolfram.com/TriangularGrid.html .. _grid graph: http://www-cs-students.stanford.edu/~amitp/game-programming/grids/ .. _Triangular Tiling: https://en.wikipedia.org/wiki/Triangular_tiling Parameters ---------- m : int The number of rows in the lattice. n : int The number of columns in the lattice. periodic : bool (default: False) If True, join the boundary vertices of the grid using periodic boundary conditions. The join between boundaries is the final row and column of triangles. This means there is one row and one column fewer nodes for the periodic lattice. Periodic lattices require `m >= 3`, `n >= 5` and are allowed but misaligned if `m` or `n` are odd with_positions : bool (default: True) Store the coordinates of each node in the graph node attribute 'pos'. The coordinates provide a lattice with equilateral triangles. Periodic positions shift the nodes vertically in a nonlinear way so the edges don't overlap so much. create_using : NetworkX graph constructor, optional (default=nx.Graph) Graph type to create. If graph instance, then cleared before populated. Returns ------- NetworkX graph The *m* by *n* triangular lattice graph. """ H = empty_graph(0, create_using) if n == 0 or m == 0: return H if periodic: if n < 5 or m < 3: msg = f"m > 2 and n > 4 required for periodic. m={m}, n={n}" raise NetworkXError(msg) N = (n + 1) // 2 # number of nodes in row rows = range(m + 1) cols = range(N + 1) # Make grid H.add_edges_from(((i, j), (i + 1, j)) for j in rows for i in cols[:N]) H.add_edges_from(((i, j), (i, j + 1)) for j in rows[:m] for i in cols) # add diagonals H.add_edges_from(((i, j), (i + 1, j + 1)) for j in rows[1:m:2] for i in cols[:N]) H.add_edges_from(((i + 1, j), (i, j + 1)) for j in rows[:m:2] for i in cols[:N]) # identify boundary nodes if periodic if periodic is True: for i in cols: H = contracted_nodes(H, (i, 0), (i, m)) for j in rows[:m]: H = contracted_nodes(H, (0, j), (N, j)) elif n % 2: # remove extra nodes H.remove_nodes_from((N, j) for j in rows[1::2]) # Add position node attributes if with_positions: ii = (i for i in cols for j in rows) jj = (j for i in cols for j in rows) xx = (0.5 * (j % 2) + i for i in cols for j in rows) h = sqrt(3) / 2 yy = (h * j for i in cols for j in rows) pos = {(i, j): (x, y) for i, j, x, y in zip(ii, jj, xx, yy) if (i, j) in H} set_node_attributes(H, pos, 'pos') return H
def pagerank_scipy(G, sim_mat, alpha, beta, personalization=None, max_iter=100, tol=1.0e-6, weight='weight', dangling=None): #import scipy.sparse N = len(G) if N == 0: return {} nodelist = G.nodes() M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, dtype=float) S = scipy.array(M.sum(axis=1)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M # initial vector x = scipy.repeat(1.0 / N, N) # Personalization vector if personalization is None: p = scipy.repeat(1.0 / N, N) else: missing = set(nodelist) - set(personalization) if missing: raise NetworkXError('Personalization vector dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) p = scipy.array([personalization[n] for n in nodelist], dtype=float) p = p / p.sum() # Dangling nodes if dangling is None: dangling_weights = p else: missing = set(nodelist) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) # Convert the dangling dictionary into an array in nodelist order dangling_weights = scipy.array([dangling[n] for n in nodelist], dtype=float) dangling_weights /= dangling_weights.sum() is_dangling = scipy.where(S == 0)[0] # 遷移行列とsim_matを統合 #sim_mat = mk_sparse_sim_mat(G, item_mat) M = beta * M + (1 - beta) * sim_mat #S = scipy.array(M.sum(axis=1)).flatten() #S[S != 0] = 1.0 / S[S != 0] #Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') #M = Q * M # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + \ (1 - alpha) * p # check convergence, l1 norm x = x / x.sum() err = scipy.absolute(x - xlast).sum() if err < N * tol: return dict(zip(nodelist, map(float, x))) # pagerankの収束ちゃんとやっとく print(x.sum()) print(err) print(N * tol) #raise NetworkXError('pagerank_scipy: power iteration failed to converge ' #'in %d iterations.' % max_iter) return dict(zip(nodelist, map(float, x)))
def add_edge(self, u, v, attr_dict=None, **attr): """Add an edge between u and v. The nodes u and v will be automatically added if they are not already in the graph. Edge attributes can be specified with keywords or by providing a dictionary with key/value pairs. See examples below. Parameters ---------- u,v : nodes Nodes can be, for example, strings or numbers. Nodes must be hashable (and not None) Python objects. attr_dict : dictionary, optional (default= no attributes) Dictionary of edge attributes. Key/value pairs will update existing data associated with the edge. attr : keyword arguments, optional Edge data (or labels or objects) can be assigned using keyword arguments. See Also -------- add_edges_from : add a collection of edges Notes ----- Adding an edge that already exists updates the edge data. Many NetworkX algorithms designed for weighted graphs use as the edge weight a numerical value assigned to a keyword which by default is 'weight'. Examples -------- The following all add the edge e=(1,2) to graph G: >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc >>> e = (1,2) >>> G.add_edge(1, 2) # explicit two-node form >>> G.add_edge(*e) # single edge as tuple of two nodes >>> G.add_edges_from( [(1,2)] ) # add edges from iterable container Associate data to edges using keywords: >>> G.add_edge(1, 2, weight=3) >>> G.add_edge(1, 3, weight=7, capacity=15, length=342.7) """ # set up attribute dict if attr_dict is None: attr_dict = attr else: try: attr_dict.update(attr) except AttributeError: raise NetworkXError(\ "The attr_dict argument must be a dictionary.") # add nodes if u not in self.succ: self.succ[u] = {} self.pred[u] = {} self.node[u] = {} if v not in self.succ: self.succ[v] = {} self.pred[v] = {} self.node[v] = {} # add the edge datadict = self.adj[u].get(v, {}) datadict.update(attr_dict) self.succ[u][v] = datadict self.pred[v][u] = datadict
def parse_gml_lines(lines, label, destringizer): """Parse GML `lines` into a graph.""" def tokenize(): patterns = [ r"[A-Za-z][0-9A-Za-z_]*\b", # keys # reals r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[Ee][+-]?[0-9]+)?", r"[+-]?[0-9]+", # ints r'".*?"', # strings r"\[", # dict start r"\]", # dict end r"#.*$|\s+", # comments and whitespaces ] tokens = re.compile("|".join(f"({pattern})" for pattern in patterns)) lineno = 0 for line in lines: length = len(line) pos = 0 while pos < length: match = tokens.match(line, pos) if match is None: m = f"cannot tokenize {line[pos:]} at ({lineno + 1}, {pos + 1})" raise NetworkXError(m) for i in range(len(patterns)): group = match.group(i + 1) if group is not None: if i == 0: # keys value = group.rstrip() elif i == 1: # reals value = float(group) elif i == 2: # ints value = int(group) else: value = group if i != 6: # comments and whitespaces yield Token(Pattern(i), value, lineno + 1, pos + 1) pos += len(group) break lineno += 1 yield Token(None, None, lineno + 1, 1) # EOF def unexpected(curr_token, expected): category, value, lineno, pos = curr_token value = repr(value) if value is not None else "EOF" raise NetworkXError( f"expected {expected}, found {value} at ({lineno}, {pos})") def consume(curr_token, category, expected): if curr_token.category == category: return next(tokens) unexpected(curr_token, expected) def parse_kv(curr_token): dct = defaultdict(list) while curr_token.category == Pattern.KEYS: key = curr_token.value curr_token = next(tokens) category = curr_token.category if category == Pattern.REALS or category == Pattern.INTS: value = curr_token.value curr_token = next(tokens) elif category == Pattern.STRINGS: value = unescape(curr_token.value[1:-1]) if destringizer: try: value = destringizer(value) except ValueError: pass curr_token = next(tokens) elif category == Pattern.DICT_START: curr_token, value = parse_dict(curr_token) else: # Allow for string convertible id and label values if key in ("id", "label", "source", "target"): try: # String convert the token value value = unescape(str(curr_token.value)) if destringizer: try: value = destringizer(value) except ValueError: pass curr_token = next(tokens) except Exception: msg = ("an int, float, string, '[' or string" + " convertable ASCII value for node id or label") unexpected(curr_token, msg) else: # Otherwise error out unexpected(curr_token, "an int, float, string or '['") dct[key].append(value) def clean_dict_value(value): if not isinstance(value, list): return value if len(value) == 1: return value[0] if value[0] == LIST_START_VALUE: return value[1:] return value dct = {key: clean_dict_value(value) for key, value in dct.items()} return curr_token, dct def parse_dict(curr_token): # dict start curr_token = consume(curr_token, Pattern.DICT_START, "'['") # dict contents curr_token, dct = parse_kv(curr_token) # dict end curr_token = consume(curr_token, Pattern.DICT_END, "']'") return curr_token, dct def parse_graph(): curr_token, dct = parse_kv(next(tokens)) if curr_token.category is not None: # EOF unexpected(curr_token, "EOF") if "graph" not in dct: raise NetworkXError("input contains no graph") graph = dct["graph"] if isinstance(graph, list): raise NetworkXError("input contains more than one graph") return graph tokens = tokenize() graph = parse_graph() directed = graph.pop("directed", False) multigraph = graph.pop("multigraph", False) if not multigraph: G = nx.DiGraph() if directed else nx.Graph() else: G = nx.MultiDiGraph() if directed else nx.MultiGraph() graph_attr = {k: v for k, v in graph.items() if k not in ("node", "edge")} G.graph.update(graph_attr) def pop_attr(dct, category, attr, i): try: return dct.pop(attr) except KeyError as e: raise NetworkXError( f"{category} #{i} has no {attr!r} attribute") from e nodes = graph.get("node", []) mapping = {} node_labels = set() for i, node in enumerate(nodes if isinstance(nodes, list) else [nodes]): id = pop_attr(node, "node", "id", i) if id in G: raise NetworkXError(f"node id {id!r} is duplicated") if label is not None and label != "id": node_label = pop_attr(node, "node", label, i) if node_label in node_labels: raise NetworkXError(f"node label {node_label!r} is duplicated") node_labels.add(node_label) mapping[id] = node_label G.add_node(id, **node) edges = graph.get("edge", []) for i, edge in enumerate(edges if isinstance(edges, list) else [edges]): source = pop_attr(edge, "edge", "source", i) target = pop_attr(edge, "edge", "target", i) if source not in G: raise NetworkXError(f"edge #{i} has undefined source {source!r}") if target not in G: raise NetworkXError(f"edge #{i} has undefined target {target!r}") if not multigraph: if not G.has_edge(source, target): G.add_edge(source, target, **edge) else: arrow = "->" if directed else "--" msg = f"edge #{i} ({source!r}{arrow}{target!r}) is duplicated" raise nx.NetworkXError(msg) else: key = edge.pop("key", None) if key is not None and G.has_edge(source, target, key): arrow = "->" if directed else "--" msg = f"edge #{i} ({source!r}{arrow}{target!r}, {key!r})" msg2 = 'Hint: If multigraph add "multigraph 1" to file header.' raise nx.NetworkXError(msg + " is duplicated\n" + msg2) G.add_edge(source, target, key, **edge) if label is not None and label != "id": G = nx.relabel_nodes(G, mapping) return G
def predecessors_iter(self, n): """Return an iterator over predecessor nodes of n.""" try: return iter(self.pred[n]) except KeyError: raise NetworkXError("The node %s is not in the digraph." % (n, ))
def page_rank(G, alpha=0.85, max_iter=100, tol=1.0e-4, nstart=None): """Return a dictionary keyed by node of the PageRank of G. PageRank computes the largest eigenvector of the stochastic adjacency matrix of G. The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. A starting vector for the power iteration can be given in the dictionary nstart. This is a pure Python implementation. """ if hasattr(G, "multiedges"): if G.multiedges == True: raise TypeError, \ "page_rank not valid for graphs with multiedges." # create a copy in (right) stochastic form W = stochastic(G) # choose fixed starting vector if not given if nstart is None: x = dict.fromkeys(W, 1.0 / W.number_of_nodes()) else: x = nstart # normalize starting vector to 1 s = 1.0 / sum(x.values()) for k in x: x[k] *= s nnodes = W.number_of_nodes() # "dangling" nodes, no links out from them def mysum(l): print l return sum(l.values()) dangle = [n for n in W if mysum(W.adj[n]) == 0.0] # XGraph internals exposed # pagerank power iteration: make up to max_iter iterations for i in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha / nnodes * sum(xlast[n] for n in dangle) teleportsum = (1.0 - alpha) / nnodes * sum(xlast.values()) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr] += alpha * xlast[n] * W.adj[n][ nbr] # XGraph internals exposed x[n] += danglesum + teleportsum # normalize vector to 1 s = 1.0 / sum(x.values()) for n in x: x[n] *= s # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) print "Why n*tol?", n, tol if err < tol: # if err < n*tol: return x raise NetworkXError( "page_rank: power iteration failed to converge in %d iterations." % (i + 1))