def __init__(self, network: Network, weight: Weight = None, start_node: Optional[str] = None, restart_prob=0) -> None: """Initialises a random walk process in a given start node. The initial time t of the random walk will be set to zero and the initial state is set to the given start node. If start_node is omitted a node will be chosen uniformly at random. """ # initialize variables # network in which the random walk is simulated self._network: Network = network # time of the random walk self._t: int = 0 # transition matrix for the random walk self._transition_matrix = RandomWalk.transition_matrix( network, weight, restart_prob) # uids of the nodes self._node_uids: list = list(network.nodes.keys()) self._visitations = np.ravel( np.zeros(shape=(1, network.number_of_nodes()))) # path of the random walker # TODO: implement new path class # self._path = Path() # eigenvectors and eigenvalues if network.number_of_nodes() > 2: _, eigenvectors = spl.eigs(self._transition_matrix.transpose(), k=1, which='LM') pi = eigenvectors.reshape(eigenvectors.size, ) else: eigenvals, eigenvectors = spla.eig( self._transition_matrix.transpose().toarray()) x = np.argsort(-eigenvals) pi = eigenvectors[x][:, 0] # stationary probabilities self._stationary_probabilities = np.real(pi / np.sum(pi)) if start_node is None: self._current_node = np.random.choice(self._node_uids) elif start_node not in network.nodes: LOG.warning('Invalid start node for random walk. ' 'Picking random node.') self._current_node = np.random.choice(self._node_uids) else: self._current_node = start_node self._visitations[network.nodes.index[self._current_node]] += 1
def ER_np_randomize(network: Network, loops: bool = False) -> Network: """Generates a random microstate based on the G(n,p) model. The number of nodes, the expected number of edges, the edge directedness and the node uids of the generated network match the corresponding values of a given network instance. """ n = network.number_of_nodes() m = network.number_of_edges() M = max_edges(n, directed=network.directed, loops=loops) p = m/M return ER_np(n=n, p=p, directed=network.directed, loops=loops, node_uids=list(network.nodes.uids))
def to_network(frame: pd.DataFrame, loops: bool = True, directed: bool = True, multiedges: bool = False, **kwargs: Any) -> Network: """Read network from a pandas data frame.""" # if no v/w columns are included, pick first synonym frame = _check_column_name(frame, 'v', config['edge']['v_synonyms']) frame = _check_column_name(frame, 'w', config['edge']['w_synonyms']) LOG.debug('Creating %s network', directed) node_set = set(frame['v']).union(set(frame['w'])) if None in node_set: LOG.error('DataFrame minimally needs columns \'v\' and \'w\'') raise IOError nodes = {n: Node(n) for n in node_set} edges: list = [] edge_set: set = set() # TODO: Make this for loop faster! for row in frame.to_dict(orient='records'): v = row.pop('v') w = row.pop('w') uid = row.pop('uid', None) if (v, w) in edge_set and not multiedges: LOG.warning( 'The edge (%s,%s) exist already ' 'and will not be considered. ' 'To capture this edge, please ' 'enalbe multiedges and/or directed!', v, w) elif loops or v != w: edges.append(Edge(nodes[v], nodes[w], uid=uid, **row)) edge_set.add((v, w)) if not directed: edge_set.add((w, v)) else: continue net = Network(directed=directed, multiedges=multiedges, **kwargs) for node in nodes.values(): net.nodes.add(node) for edge in edges: net.edges._add(edge) net._add_edge_properties() return net
def Q_max_modularity(network: Network, cluster_mapping: Dict) -> float: """Computes the maximum theoretically possible Q-modularity for a given network and cluster mapping """ m = network.number_of_edges() qmax: float = 2 * m for v in network.nodes.uids: for w in network.nodes.uids: if cluster_mapping[v] == cluster_mapping[w]: qmax -= network.degrees()[v] * network.degrees()[w] / (2 * m) return qmax / (2 * m)
def Q_modularity(network: Network, cluster_mapping: Dict) -> float: """Computes the Q-modularity of a network for a given cluster mapping """ A = network.adjacency_matrix() m = network.number_of_edges() q = 0.0 for v in network.nodes.uids: for w in network.nodes.uids: if cluster_mapping[v] == cluster_mapping[w]: q += A[network.nodes.index[v], network.nodes.index[w]] - \ network.degrees()[v] * network.degrees()[w]/(2*m) return q / (2 * m)
def ER_nm_randomize(network: Network, loops: bool = False, multiedges: bool = False) -> Union[Network, None]: """Generates a random graph whose number of nodes, edges, edge directedness and node uids match the corresponding values of a given network instance. Useful to generate a randomized version of a network. Parameters ---------- network : pathpy.Network Given network used to determine number of nodes, edges, node uids, and edge directedness loops : bool Whether or not the generated network can contain loops. multi_edge : bool Whether or not multiple edges can be added to the same node pair Examples -------- Generate random undirected network with 10 nodes and 25 edges >>> import pathpy as pp >>> n = pp.Network(directed=False) >>> n.add_edge('a', 'b') >>> n.add_edge('b', 'c') >>> n.add_edge('d', 'e') >>> r = pp.generators.ER_nm(n) >>> print(r) Uid: 0x... Type: Network Directed: False Unique nodes: 5 Unique edges: 3 Unique paths: 0 Total paths: 0 >>> print(r.nodes.uids) { 'a', 'b', 'c', 'd', 'e'} """ return ER_nm(network.number_of_nodes(), network.number_of_edges(), directed=network.directed, loops=loops, multiedges=multiedges, node_uids=list(network.nodes.uids))
def _(self, data: PathCollection, order: Optional[int] = None) -> None: # Check order if order is not None: self._order = order if 0 <= self.order <= 1: super().fit(data, order=self.order) elif self.order > 1: # --- START --- nc = NodeCollection() for node in data.nodes.values(): nc.add(node) ec = EdgeCollection(nodes=nc) for edge in data.edges.values(): ec.add(edge) self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec) # --- END --- # get path data paths = data # generate first order representation of data network = Network.from_paths(paths, frequencies=True) self.calculate(network, paths) else: LOG.error('A Null Model with order %s is not supported', self.order) raise AttributeError
def single_source_shortest_paths( network: Network, source: str, weight: Union[bool, str, None] = None) -> Union[dict, np.array]: """Calculates all shortest paths from a single given source node using a custom implementation of Dijkstra's algorithm based on a priority queue. """ Q: dict = dict() dist = dict() prev = dict() dist[source] = 0 for v in network.nodes.uids: if v != source: dist[v] = np.inf prev[v] = None Q[v] = dist[v] while Q: u = min(Q.keys(), key=(lambda k: Q[ k])) # TODO: Do this more efficiently with a proper priority queue del Q[u] for v in network.successors[u]: # for networks with no edge costs, edges have constant cost cost = 1 if weight == True: cost = list(network.edges[u, v])[0].attributes['weight'] elif weight != False and weight != None: cost = list(network.edges[u, v])[0].attributes[weight] new_dist = dist[u] + cost if new_dist < dist[v.uid]: dist[v.uid] = new_dist prev[v.uid] = u if v.uid in Q: Q[v.uid] = new_dist # calculate distance vector dist_arr = np.zeros(network.number_of_nodes()) for v in network.nodes: dist_arr[network.nodes.index[v.uid]] = dist[v.uid] # construct shortest paths s_p: dict = dict() for dest in network.nodes: if dest.uid != source: path = [dest.uid] x = dest.uid while x != source and x != None: x = prev[x] path.append(x) if x == None: s_p[dest.uid] = None else: path.reverse() s_p[dest.uid] = tuple(path) return dist_arr, s_p
def closeness_centrality(network: Network, normalized: bool = False) -> Dict: """Calculates the closeness centrality of all nodes. .. note:: If `normalized=False` (Default) for each node v the closeness centrality is given as 1/sum_w(dist(v,w)) where dist(v,w) is the shortest path distance between v and w. For `normalized=True` the counter is multiplied by n-1 where n is the number of nodes in the network. Shortest path distances are calculated using the function `shortest_paths.distance_matrix`. Parameters ---------- network : Network The :py:class:`Network` object that contains the network normalized : bool If True the resulting centralities will be normalized based on the average shortest path length. Examples -------- Compute closeness centrality in a simple network >>> import pathpy as pp >>> net = pp.Network(directed=False) >>> net.add_edge('a', 'x') >>> net.add_edge('x', 'b') >>> c = pp.algorithms.centralities.closeness_centrality(net) >>> c['a'] 0.3333333333333333 """ distances = shortest_paths.distance_matrix(network) cl: defaultdict = defaultdict(float) mapping = {v: k for k, v in network.nodes.index.items()} n = network.number_of_nodes() # calculate closeness values for d in range(n): for x in range(n): if d != x and distances[d, x] < np.inf: cl[mapping[x]] += distances[d, x] # assign centrality zero to nodes not occurring # on higher-order shortest paths for v in network.nodes.uids: cl[v] += 0.0 if cl[v] > 0.0: cl[v] = 1.0 / cl[v] if normalized: cl[v] *= n-1 return cl
def degree_centrality(network: Network, mode: str = 'degree') -> dict: """Calculates the degree centrality of all nodes. Parameters ---------- network : Network The :py:class:`Network` object that contains the network mode : str Can be chose nas 'degree', 'indegree', or 'outdegree'. Determines whether to calculate undirected/total degrees, indegrees, or degrees Examples -------- Compute degree centrality in a simple network >>> import pathpy as pp >>> net = pp.Network(directed=True) >>> net.add_edge('a', 'x') >>> net.add_edge('x', 'b') >>> c = pp.algorithms.centralities.degree_centrality(net) >>> c['a'] 1 >>> c = pp.algorithms.centralities.degree_centrality(net, mode='indegree') >>> c['a'] 0 """ d: dict = dict() if mode not in set(['degree', 'indegree', 'outdegree']): LOG.error('Mode must be \'degree\', \'indegree\' or \'outdegree\'') raise KeyError for v in network.nodes.keys(): if mode == 'indegree': d[v] = network.indegrees()[v] elif mode == 'outdegree': d[v] = network.outdegrees()[v] else: d[v] = network.degrees()[v] return d
def local_clustering_coefficient(network: Network, v: str) -> float: """Calculates the local clustering coefficient of a node in a network. The local clustering coefficient of any node with an (out-)degree smaller than two is defined as zero. For all other nodes, it is defined as: cc(c) := 2*k(i)/(d_i(d_i-1)) or cc(c) := k(i)/(d_out_i(d_out_i-1)) in undirected and directed networks respectively. Parameters ---------- network : Network The network in which to calculate the local clustering coefficient node : str The node for which the local clustering coefficient shall be calculated """ lcc: float = 0. d = network.degrees() o = network.outdegrees() if network.directed and o[v] >= 2 or network.directed == False and d[ v] >= 2: k: int = 0 for edge in network.edges: if (edge.v.uid != edge.w.uid and edge.v in network.successors[v] and edge.w in network.successors[v]): k += 1 if network.directed: lcc = k / (o[v] * (o[v] - 1)) else: lcc = 2 * k / (d[v] * (d[v] - 1)) return lcc
def Molloy_Reed_randomize(network: Network) -> Network: # degrees are listed in order of node indices degrees = network.degree_sequence() # generate node uids in same order node_uids = ['-'] * len(degrees) for v in network.nodes.uids: node_uids[network.nodes.index[v]] = v return Molloy_Reed(degrees, node_uids=node_uids)
def modularity_maximisation(network: Network, iterations: int = 1000) -> Tuple[Dict, float]: """Modularity maximisation.""" A = network.adjacency_matrix(weighted=False) D = network.degrees() n = network.number_of_nodes() m = network.number_of_edges() C = {} num_communities = n community_to_nodes = {} c = 0 for v in network.nodes.uids: C[v] = c community_to_nodes[c] = set([v]) c += 1 q = _Q_merge(network, A, D, n, m, C) for i in tqdm(range(iterations), desc='maximising modularity'): # randomly choose two communities x, y = random.sample(community_to_nodes.keys(), 2) # check Q of merged communities q_new = _Q_merge(network, A, D, n, m, C, merge=set([x, y])) if q_new > q: # merge communities for v in community_to_nodes[x]: C[v] = y community_to_nodes[y] = community_to_nodes[y].union( community_to_nodes[x]) q = q_new num_communities -= 1 del community_to_nodes[x] return C, q
def distance_matrix(network: Network, weight: Union[str, bool, None] = None) -> np.ndarray: """Calculates shortest path distances between all pairs of nodes .. note:: Shortest paths are calculated using the implementation of the Floyd-Warshall algorithm provided in `scipy.csgraph`. Parameters ---------- network : Network The :py:class:`Network` object that contains the network weighted : bool If True cheapest paths will be calculated. Examples -------- Generate a path and add it to the network. >>> import pathpy as pp >>> net = pp.Network() >>> net.add_edges(('a', 'x'), ('x', 'y'), ('y', 'c')) >>> m = pp.algorithms.shortest_paths.distance_matrix(net) >>> m[0,3] 3 Add shorter path >>> net.add_edges(('a', 'x'), ('x', 'c')) >>> m = pp.algorithms.shortest_paths.distance_matrix(net) >>> m[0,3] 2 """ A = network.adjacency_matrix(weight=weight) dist_matrix = csgraph.floyd_warshall(A, network.directed, unweighted=(not weight), overwrite=False) return dist_matrix
def calculate(self, network: Network, paths: PathCollection) -> None: """Calculate the null modell""" # get transition matrix of the underlying network transition_matrix = network.transition_matrix(weight='frequency') # generate all possible paths possible_paths = self.possible_paths(paths.edges, self.order) # Get all sub-paths of order-1 subpaths = SubPathCollection.from_paths(paths, min_length=self.order - 1, max_length=self.order - 1, include_path=True) # add paths to the higer-order network for path in possible_paths: nodes: list = [] for subpath in self.window(path, size=self.order - 1): nodes.append(subpath) for _v, _w in zip(nodes[:-1], nodes[1:]): if _v not in self.nodes: self.nodes.add(_v) if _w not in self.nodes: self.nodes.add(_w) _nodes = (self.nodes[_v], self.nodes[_w]) # generate the expected frequencies of all possible paths if _v in subpaths: frequency = subpaths.counter[subpaths[_v]] * \ transition_matrix[network.nodes.index[_w[-1].v.uid], network.nodes.index[_w[-1].w.uid]] else: frequency = 0.0 if _nodes not in self.edges: self.add_edge(*_nodes, possible=0, observed=frequency, frequency=frequency)
def test_from_network(): net = Network() net.add_edge('a', 'c', frequency=10) net.add_edge('c', 'd', frequency=10) net.add_edge('b', 'c', frequency=10) net.add_edge('c', 'e', frequency=10) null = NullModel.from_network(net, order=2) assert null.number_of_edges() == 4 assert null.number_of_nodes() == 4 for e in null.edges: assert e['frequency'] == 5.0
def largest_connected_component(network: Network) -> Network: """Returns the largest connected component of the network. """ LOG.debug('Computing connected components') components = find_connected_components(network) max_size = 0 max_comp: dict = {} for i in components: if len(components[i]) > max_size: max_size = len(components[i]) max_comp = components[i] LOG.debug('Copying network') lcc = network.copy() LOG.debug('Removing nodes outside largest component') for v in list(lcc.nodes.keys()): if v not in max_comp: lcc.remove_node(v) return lcc
def transition_matrix(network: Network, weight: Weight = None, restart_prob: float = 0) -> sp.sparse.csr_matrix: """Returns a transition matrix of the random walker. Returns a transition matrix that describes a random walk process in the given network. Parameters ---------- network: Network The network for which the transition matrix will be created. weight: bool Whether to account for edge weights when computing transition probabilities. """ A = adjacency_matrix(network, weight=weight) D = A.sum(axis=1) n = network.number_of_nodes() T = sp.sparse.csr_matrix((n, n)) for i in range(n): for j in range(n): if D[i] > 0: T[i, j] = restart_prob * (1. / n) + ( 1 - restart_prob) * A[i, j] / D[i] else: LOG.warning( 'Computing transition matrix for node with zero out-degree' ) if restart_prob > 0: T[i, j] = 1. / n else: T[i, j] = 0.0 return T
def lattice_network(start: int = 0, stop: int = 10, dims: int = 2): """ Generates a n-dimensional lattice network with coordinates in each dimension ranging from start (inclusive) to stop (exclusive) """ network = Network(directed=False) for pos in _multi_dim_range(start, stop, dims): network.add_node( Node("".join(str(i) + '-' for i in pos).strip('-'), pos=np.array(pos))) for v in network.nodes: for w in network.nodes: if np.sum(np.abs(v['pos'] - w['pos'])) == 1 and ( v.uid, w.uid) not in network.edges: network.add_edge(v, w) return network
def check_tree(network: Network): if network.directed: # identify node with zero indegree root = None for v in network.nodes.uids: if network.indegrees()[v] == 0: if root == None: root = v else: # two nodes with in-degree zero -> no tree return False if root == None: # no node with indegree zero -> no tree return False visited = defaultdict(bool) def dfs(network: Network, node: str): nonlocal visited visited[node] = True tree = True for v in network.successors[node]: if visited[v.uid]: tree &= False else: tree &= dfs(network, v.uid) return tree return dfs(network, root) else: LOG.error('Tree checking not supported for undirected networks') return False
def find_connected_components(network: Network) -> Dict: """Computes connected components of a network. Parameters ---------- network: Network Network instance Returns ------- dict dictionary mapping node uids to components (represented as integer IDs) """ if network.number_of_nodes() == 0 or network.number_of_edges() == 0: return dict() # these are used as nonlocal variables in tarjan index: int = 0 S: list = [] indices: defaultdict = defaultdict(lambda: None) low_link: defaultdict = defaultdict(lambda: None) on_stack: defaultdict = defaultdict(lambda: False) components: dict = {} def tarjan(v: str): """Tarjan's algorithm""" nonlocal index nonlocal S nonlocal indices nonlocal low_link nonlocal on_stack nonlocal components indices[v] = index low_link[v] = index index += 1 S.append(v) on_stack[v] = True for node in network.successors[v]: w = node.uid if indices[w] is None: tarjan(w) low_link[v] = min(low_link[v], low_link[w]) elif on_stack[w]: low_link[v] = min(low_link[v], indices[w]) # create component of node v if low_link[v] == indices[v]: components[v] = set() while True: w = S.pop() on_stack[w] = False components[v].add(w) if v == w: break # compute strongly connected components LOG.debug('Computing connected components') for v in tqdm(network.nodes.keys(), desc='component calculation'): if indices[v] is None: tarjan(v) LOG.debug('Mapping component sizes') return dict(zip(range(len(components)), components.values()))
def all_shortest_paths(network: Network, weight: Union[str, bool, None] = None, return_distance_matrix: bool = True) -> Union[defaultdict, Tuple[defaultdict, np.ndarray]]: """Calculates shortest paths between all pairs of nodes. .. note:: Shortest paths are calculated using a custom implementation of the Floyd-Warshall algorithm. Parameters ---------- network : Network The :py:class:`Network` object that contains the network weighted : bool If True cheapest paths will be calculated. Examples -------- Generate a path and add it to the network. >>> import pathpy as pp >>> net = pp.Network() >>> net.add_edges(('a', 'x'), ('x', 'c')) >>> paths = pp.algorithms.shortest_paths.all_shortest_paths(net) >>> paths['a']['c'] {('a', 'x', 'c')} Add additional path >>> net.add_edges(('a', 'y'), ('y', 'c')) >>> paths = pp.algorithms.shortest_paths.all_shortest_paths(net) >>> paths['a']['c'] {('a', 'x', 'c'), ('a', 'y', 'c')} """ dist: defaultdict = defaultdict(lambda: defaultdict(lambda: np.inf)) s_p: defaultdict = defaultdict(lambda: defaultdict(set)) for e in network.edges: cost = 1 if weight == True: cost = e.attributes['weight'] elif weight != False and weight != None: cost = e.attributes[weight] dist[e.v.uid][e.w.uid] = cost s_p[e.v.uid][e.w.uid].add((e.v.uid, e.w.uid)) if not network.directed: dist[e.w.uid][e.v.uid] = cost s_p[e.w.uid][e.v.uid].add((e.w.uid, e.v.uid)) for k in tqdm(network.nodes.keys(), desc='calculating shortest paths between all nodes'): for v in network.nodes.keys(): for w in network.nodes.keys(): if v != w: if dist[v][w] > dist[v][k] + dist[k][w]: # we have found a shorter path dist[v][w] = dist[v][k] + dist[k][w] s_p[v][w] = set() for p in list(s_p[v][k]): for q in list(s_p[k][w]): s_p[v][w].add(p + q[1:]) elif dist[v][w] == dist[v][k] + dist[k][w]: # we have found another shortest path for p in list(s_p[v][k]): for q in list(s_p[k][w]): s_p[v][w].add(p + q[1:]) for v in network.nodes.keys(): dist[v][v] = 0 s_p[v][v].add((v,)) if return_distance_matrix: dist_arr = np.ndarray( shape=(network.number_of_nodes(), network.number_of_nodes())) for v in network.nodes: for w in network.nodes: dist_arr[network.nodes.index[v.uid], network.nodes.index[w.uid]] = dist[v.uid][w.uid] return s_p, dist_arr else: return s_p
def from_dataframe(df: pd.DataFrame, directed: bool = True, loops: bool = True, multiedges: bool = False, **kwargs: Any) -> Network: """Reads a network from a pandas dataframe. By default, columns `v` and `w` will be used as source and target of edges. If no column 'v' or 'w' exists, the list of synonyms for `v` and `w`` in the config file will be used to remap columns, choosing the first matching entries. Any columns not used to create edges will be used as edge attributes, e.g. if a column 'v' is present and an additional column `source`is given, `source` will be assigned as an edge property. In addition, an optional column `uid` will be used to assign node uids. If this column is not present, default edge uids will be created. Any other columns (e.g. weight, type, time, etc.) will be assigned as edge attributes. kwargs will be assigned as network attributes. Parameters ---------- directed: bool Whether to generate a directed or undirected network. **kwargs: Any List of key-value pairs that will be assigned as network attributes Examples -------- """ # if no v/w columns are included, pick first synonym if 'v' not in df.columns: LOG.info('No column v, searching for synonyms') for col in df.columns: if col in config['edge']['v_synonyms']: LOG.info('Remapping column \'%s\' to \'v\'', col) df.rename(columns={col: "v"}, inplace=True) continue if 'w' not in df.columns: LOG.info('No column w, searching for synonyms') for col in df.columns: if col in config['edge']['w_synonyms']: LOG.info('Remapping column \'%s\' to \'w\'', col) df.rename(columns={col: "w"}, inplace=True) continue LOG.debug('Creating %s network', directed) net = Network(directed=directed, multiedges=multiedges, **kwargs) for row in df.to_dict(orient='records'): # get edge v = row.get('v', None) w = row.get('w', None) uid = row.get('uid', None) if v is None or w is None: LOG.error('DataFrame minimally needs columns \'v\' and \'w\'') raise IOError else: v = str(v) w = str(w) if v not in net.nodes.uids: net.add_node(v) if w not in net.nodes.uids: net.add_node(w) if uid is None: edge = Edge(net.nodes[v], net.nodes[w]) else: edge = Edge(net.nodes[v], net.nodes[w], uid=uid) if loops or edge.v != edge.w: net.add_edge(edge) reserved_columns = set(['v', 'w', 'uid']) for k in row: if k not in reserved_columns: edge[k] = row[k] return net
def read_graphml(filename: str): """Reads a pathyp.Network from a graphml file. This function supports typed Node and Edge attributes including default values. Warnings are issued if the type of Node or Edge attributes are undeclared, in which case the attribute type will fall back to string. Parameters ---------- filename: str The graphml file to read the graph from """ root = ET.parse(filename).getroot() graph = root.find('{http://graphml.graphdrawing.org/xmlns}graph') directed = graph.attrib['edgedefault'] != 'undirected' uid = graph.attrib['id'] n = Network(directed=directed, uid=uid) node_attributes = {} edge_attributes = {} # read attribute types and default values for a in root.findall('{http://graphml.graphdrawing.org/xmlns}key'): a_id = a.attrib['id'] a_name = a.attrib['attr.name'] a_type = a.attrib['attr.type'] a_for = a.attrib['for'] # store attribute info and assign data types a_data = {'name': a_name} if a_type == 'string': a_data['type'] = str elif a_type == 'float': a_data['type'] = float elif a_type == 'double': a_data['type'] = float elif a_type == 'int': a_data['type'] = int elif a_type == 'long': a_data['type'] = int elif a_type == 'boolean': a_data['type'] = bool else: a_data['type'] = str d = a.find('{http://graphml.graphdrawing.org/xmlns}default') if d is not None: a_data['default'] = a_data['type'](d.text) if a_for == 'node': node_attributes[a_name] = a_data if a_for == 'edge': edge_attributes[a_name] = a_data # add nodes with uids and attributes for node in graph.findall('{http://graphml.graphdrawing.org/xmlns}node'): # create node uid = node.attrib['id'] v = Node(uid=uid) # set attribute values for a in node.findall('{http://graphml.graphdrawing.org/xmlns}data'): key = a.attrib['key'] val = a.text if key not in node_attributes: LOG.warning( 'Undeclared Node attribute "{}". Defaulting to string type.' .format(key)) v.attributes[key] = val else: v.attributes[key] = node_attributes[key]['type'](val) # set default values for a_name in node_attributes: if 'default' in node_attributes[ a_name] and v.attributes[a_name] is None: v.attributes[a_name] = node_attributes[a_name]['default'] n.add_node(v) # add edges with uids and attributes for edge in graph.findall('{http://graphml.graphdrawing.org/xmlns}edge'): # create edge source = edge.attrib['source'] target = edge.attrib['target'] uid = edge.attrib['id'] e = Edge(n.nodes[source], n.nodes[target], uid=uid) # set attribute values for a in edge.findall('{http://graphml.graphdrawing.org/xmlns}data'): key = a.attrib['key'] val = a.text if key not in edge_attributes: LOG.warning( 'Warning: Undeclared Edge attribute "{}". Defaulting to string type.' .format(key)) e.attributes[key] = val else: e.attributes[key] = edge_attributes[key]['type'](val) # set default values for a_name in edge_attributes: if 'default' in edge_attributes[ a_name] and e.attributes[a_name] is None: e.attributes[a_name] = edge_attributes[a_name]['default'] n.add_edge(e) return n
def is_connected(network: Network) -> bool: """Returns whether the network is (strongly) connected """ return largest_component_size(network) == network.number_of_nodes()
def ER_np(n: int, p: float, directed: bool = False, loops: bool = False, node_uids: Optional[list] = None) -> Network: """(n, p) Erdös-Renyi model Generates a random graph with a fixed number of n nodes and edge probability p based on the Erdös-Renyi model. Parameters ---------- n : int The number of nodes in the generated network p : float The probability with which an edge will be created between each pair of nodes directed : bool Whether a directed network should be generated loops : bool Whether or not the generated network may contain loops. node_uids : list Optional list of node uids that will be used. Examples -------- Generate random undirected network with 10 nodes >>> import pathpy as pp >>> random_graph = pp.algorithms.random_graphs.ER_np(n=10, p=0.03) >>> print(random_graph.summary()) ... """ network = Network(directed=directed) if node_uids is None or len(node_uids) != n: LOG.info('No valid node uids given, generating numeric node uids') node_uids = [] for i in range(n): node_uids.append(str(i)) for i in range(n): network.add_node(node_uids[i]) for s in tqdm(range(n), 'generating G(n,p) network'): if directed: x = n else: x = s + 1 for t in range(x): if t == s and not loops: continue if np.random.random_sample() < p: network.add_edge(node_uids[s], node_uids[t]) return network
def Watts_Strogatz(n: int, s: int, p: float = 0.0, loops: bool = False, node_uids: Optional[list] = None) -> Network: """Undirected Watts-Strogatz lattice network Generates an undirected Watts-Strogatz lattice network with lattice dimensionality one. Parameters ---------- n : int The number of nodes in the generated network s : float The number of nearest neighbors that will be connected in the ring lattice p : float The rewiring probability Examples -------- Generate a Watts-Strogatz network with 100 nodes >>> import pathpy as pp >>> small_world = pp.algorithms.random_graphs.Watts_Strogatz(n=100, s=2, p=0.1) >>> print(small_world.summary()) ... """ network = Network(directed=False) if node_uids is None or len(node_uids) != n: LOG.info('No valid node uids given, generating numeric node uids') node_uids = [] for i in range(n): network.add_node(Node(str(i))) node_uids.append(str(i)) else: for i in range(n): network.add_node(node_uids[i]) # construct a ring lattice (dimension 1) for i in range(n): if loops: x = 0 y = s else: x = 1 y = s + 1 for j in range(x, y): v = network.nodes[node_uids[i]] w = network.nodes[node_uids[(i + j) % n]] if (v.uid, w.uid) not in network.edges: network.add_edge(v, w) if p == 0: # nothing to do here return network # Rewire each link with probability p for edge in tqdm(list(network.edges.values()), 'generating WS network'): if np.random.rand() < p: # Delete original link and remember source node v = edge.v.uid network.remove_edge(edge) # Find new random tgt, which is not yet connected to src new_target = None # This loop repeatedly chooses a random target until we find # a target not yet connected to src. Note that this could potentially # result in an infinite loop depending on parameters. while new_target is None: x = str(np.random.randint(n)) if (x != v or loops) and (v, x) not in network.edges: new_target = x network.add_edge(v, new_target) return network
def ER_nm(n: int, m: int, directed: bool = False, loops: bool = False, multiedges: bool = False, node_uids: Optional[list] = None) -> Union[Network, None]: """(n, m) Erdös-Renyi model. Generates a random graph with a fixed number of n nodes and m edges based on the Erdös-Renyi model. Parameters ---------- n : int The number of nodes in the generated network m : int The number of randomly generated edges in the network directed : bool Whether a directed network should be generated loops : bool Whether or not the generated network may contain loops. multi_edge : bool Whether or not the same edge can be added multiple times node_uids : list Optional list of node uids that will be used. Examples -------- Generate random undirected network with 10 nodes and 25 edges >>> import pathpy as pp >>> random_graph = pp.algorithms.random_graphs.ER_nm(n=10, m=25) >>> print(random_graph.summary()) ... """ # Check parameter sanity M = max_edges(n, directed=directed, loops=loops, multiedges=multiedges) if m > M: LOG.error('Given network type with n nodes can have at most {} edges.'. format(M)) return None network = Network(directed=directed) if node_uids is None or len(node_uids) != n: LOG.info('No valid node uids given, generating numeric node uids') node_uids = [] for i in range(n): node_uids.append(str(i)) for i in range(n): network.add_node(node_uids[i]) edges = 0 while edges < m: v, w = np.random.choice(node_uids, size=2, replace=loops) if multiedges or network.nodes[w] not in network.successors[v]: network.add_edge(v, w) edges += 1 return network
def Molloy_Reed(degrees: Union[np.array, Dict[str, float]], multiedge: bool = False, relax: bool = False, node_uids: Optional[list] = None) -> Network: """Generate Molloy-Reed graph. Generates a random undirected network with given degree sequence based on the Molloy-Reed algorithm. .. note:: The condition proposed by Erdös and Gallai (1967) is used to test whether the degree sequence is graphic, i.e. whether a network with the given degree sequence exists. Parameters ---------- degrees : list List of integer node degrees. The number of nodes of the generated network corresponds to len(degrees). relax : bool If True, we conceptually allow self-loops and multi-edges, but do not add them to the network This implies that the generated network may not have exactly sum(degrees)/2 links, but it ensures that the algorithm always finishes. Examples -------- Generate random undirected network with given degree sequence >>> import pathpy as pp >>> random_network = pp.algorithms.random_graphs.Molloy_Reed([1,0]) >>> print(random_network.summary()) ... Network generation fails for non-graphic sequences >>> import pathpy as pp >>> random_network = pp.algorithms.random_graphs.Molloy_Reed([1,0]) >>> print(random_network) None """ # assume that we are given a graphical degree sequence if not is_graphic_Erdos_Gallai(degrees): return # create empty network with n nodes n = len(degrees) network = Network(directed=False, multiedges=multiedge) if node_uids is None or len(node_uids) != n: LOG.info('No valid node uids given, generating numeric node uids') node_uids = [] for i in range(n): node_uids.append(str(i)) for i in range(n): network.add_node(node_uids[i]) # generate link stubs based on degree sequence stubs = [] for i in range(n): for k in range(int(degrees[i])): stubs.append(str(node_uids[i])) # connect randomly chosen pairs of link stubs while (len(stubs) > 0): v, w = np.random.choice(stubs, 2, replace=False) if v == w or (multiedge == False and relax == False and network.nodes[w] in network.successors[v]): # remove random edge and add stubs if network.number_of_edges() > 0: edge = np.random.choice(list(network.edges)) stubs.append(edge.v.uid) stubs.append(edge.w.uid) network.remove_edge(edge) else: if not network.nodes[w] in network.successors[v]: network.add_edge(v, w) stubs.remove(v) stubs.remove(w) return network