def read_pathcollection(filename: str, separator: str = ',', frequency: bool = False, directed: bool = True, maxlines: int = None) -> PathCollection: """Read path in edgelist format Reads data from a file containing multiple lines of *edges* of the form "v,w,frequency,X" (where frequency is optional and X are arbitrary additional columns). The default separating character ',' can be changed. Parameters ---------- filename : str path to edgelist file separator : str character separating the nodes frequency : bool is a frequency given? if ``True`` it is the last element in the edge (i.e. ``a,b,2``) directed : bool are the edges directed or undirected maxlines : int number of lines to read (useful to test large files). None means the entire file is read """ from pathpy.core.path import Path, PathCollection nodes: dict = {} edges: dict = {} paths: dict = {} with open(filename, 'r') as csv: for n, line in enumerate(csv): fields = line.rstrip().split(separator) assert len(fields) >= 1, 'Error: empty line: {0}'.format(line) if frequency: path = tuple(fields[:-1]) freq = float(fields[-1]) else: path = tuple(fields) freq = 1.0 for node in path: if node not in nodes: nodes[node] = Node(node) if len(path) == 1 and path not in paths: paths[path] = Path(nodes[path[0]], frequency=freq) else: edge_list = [] for u, v in zip(path[:-1], path[1:]): if (u, v) not in edges: edges[(u, v)] = Edge(nodes[u], nodes[v]) edge_list.append(edges[(u, v)]) if path not in paths: paths[path] = Path(*edge_list, frequency=freq) if maxlines is not None and n >= maxlines: break ncoll = NodeCollection() for node in nodes.values(): ncoll.add(node) ecoll = EdgeCollection(nodes=ncoll) for edge in edges.values(): ecoll._add(edge) _paths = PathCollection(directed=directed, nodes=ncoll, edges=ecoll) for _path in paths.values(): _paths._add(_path) return _paths
def read_file(cls, filename: str, separator: str = ',', frequency: bool = False, directed: bool = True, maxlines: int = None) -> None: """ Read path in edgelist format Reads data from a file containing multiple lines of *edges* of the form "v,w,frequency,X" (where frequency is optional and X are arbitrary additional columns). The default separating character ',' can be changed. Parameters ---------- filename : str path to edgelist file separator : str character separating the nodes frequency : bool is a frequency given? if ``True`` it is the last element in the edge (i.e. ``a,b,2``) directed : bool are the edges directed or undirected maxlines : int number of lines to read (useful to test large files). None means the entire file is read """ nodes = {} edges = {} paths = {} with open(filename, 'r') as f: for n, line in enumerate(f): fields = line.rstrip().split(separator) assert len(fields) >= 2, 'Error: malformed line: {0}'.format( line) if frequency: path = tuple(fields[:-1]) f = int(fields[-1]) else: path = tuple(fields) f = 1 for node in path: if node not in nodes: nodes[node] = Node(node) edge_list = [] for u, v in zip(path[:-1], path[1:]): if (u, v) not in edges: edges[(u, v)] = Edge(nodes[u], nodes[v], uid=u + '-' + v) edge_list.append(edges[(u, v)]) if path not in paths: paths[path] = Path(*edge_list, frequency=f) if maxlines is not None and n >= maxlines: break nc = NodeCollection() nc.add(*nodes.values()) ec = EdgeCollection(nodes=nc) for edge in edges.values(): ec._add(edge) p = PathCollection(nodes=nc, edges=ec) for path in paths.values(): p._add(path) return p