def _(self, data: PathCollection, order: Optional[int] = None) -> None: # Check order if order is not None: self._order = order if 0 <= self.order <= 1: super().fit(data, order=self.order) elif self.order > 1: # --- START --- nc = NodeCollection() for node in data.nodes.values(): nc.add(node) ec = EdgeCollection(nodes=nc) for edge in data.edges.values(): ec.add(edge) self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec) # --- END --- # get path data paths = data # generate first order representation of data network = Network.from_paths(paths, frequencies=True) self.calculate(network, paths) else: LOG.error('A Null Model with order %s is not supported', self.order) raise AttributeError
def test_NodeCollection_iter(): """Test iter trough the node collection""" nodes = NodeCollection() nodes.add(['a', 'b', 'c', 'd']) for node in nodes: assert isinstance(node.uid, str) for uid, node in nodes.items(): assert isinstance(uid, str) assert isinstance(node, Node) for uid in nodes.keys(): assert isinstance(uid, str) for node in nodes.values(): assert isinstance(node, Node)
def test_NodeCollection(): """Test node collection""" nodes = NodeCollection() assert len(nodes) == 0 a = Node('a') nodes.add(a) assert len(nodes) == 1 assert nodes['a'] == a assert nodes[a] == a assert 'a' in nodes assert a in nodes assert 'a' in nodes.uids assert 'a' in nodes.keys() assert a in nodes.values() assert ('a', a) in nodes.items() nodes.add(('b', 'c')) assert len(nodes) == 3 # with pytest.raises(Exception): # nodes.add('a') # with pytest.raises(Exception): # nodes.add(a) d = Node('d', color='blue') nodes.add(d) assert nodes['d']['color'] == 'blue' d['color'] = 'red' assert nodes['d']['color'] == 'red' nodes.add(['e', ('f', 'g'), ['h', 'i']]) assert len(nodes) == 9 nodes.remove(a) assert len(nodes) == 8 assert a not in nodes nodes.remove('b') assert len(nodes) == 7 assert 'b' not in nodes nodes.remove([('e', 'f', 'g'), 'h', ['i']]) assert len(nodes) == 2
def _(self, data: Network, order: Optional[int] = None) -> None: # Check order if order is not None: self._order = order if 0 <= self.order <= 1: super().fit(data, order=self.order) elif self.order > 1: # TODO: create function to transfer base data from PathCollection object # --- START --- nc = NodeCollection() for node in data.nodes.values(): nc.add(node) ec = EdgeCollection(nodes=nc) for edge in data.edges.values(): ec.add(edge) self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec) # --- END --- # get network data network = data # generate a path representation of the data paths = PathCollection(directed=network.directed, nodes=network.nodes, edges=network.edges) for edge in data.edges: paths.add(edge, frequency=edge.attributes.get('frequency', 1)) self.calculate(network, paths) else: LOG.error('A Null Model with order %s is not supported', self.order) raise AttributeError
def _(self, data: PathCollection, order: Optional[int] = None, subpaths: bool = True) -> None: if order is not None: self._order = order order = self.order # TODO: create function to transfer base data from PathCollection object # --- START --- nc = NodeCollection() for node in data.nodes.values(): nc.add(node) ec = EdgeCollection(nodes=nc) for edge in data.edges.values(): ec.add(edge) self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec) # --- END --- # iterate over all paths for path in data: # get frequency of the observed path # TODO: define keyword in config file frequency = path.attributes.get('frequency', 1) nodes: list = [] if order == 0: for node in path.nodes: if (node, ) not in self.nodes: self.add_node(node, frequency=0.0) #self.nodes[(node,)]['frequency'] += frequency for node in path.nodes: self.nodes[(node, )]['frequency'] += frequency elif order == 1: nodes.extend([tuple([n]) for n in path.nodes]) elif 1 < order <= len(path): for subpath in self.window(path.edges, size=order - 1): nodes.append(subpath) elif order == len(path) + 1: if tuple(path.edges) not in self.nodes: self.nodes.add(tuple(path.edges)) else: pass _edges = [] for _v, _w in zip(nodes[:-1], nodes[1:]): if _v not in self.nodes: self.nodes.add(_v) if _w not in self.nodes: self.nodes.add(_w) _nodes = (self.nodes[_v], self.nodes[_w]) if _nodes not in self.edges: self.add_edge(*_nodes, possible=0, observed=0, frequency=0) _edges.append(self.edges[_nodes]) for edge in _edges: edge['frequency'] += frequency if order == len(path): edge['observed'] += frequency else: edge['possible'] += frequency if order == 0: frequencies = [n['frequency'] for n in self.nodes] for node in self.nodes: node['frequency'] = node['frequency'] / sum(frequencies) if subpaths: self._subpaths = SubPathCollection.from_paths(data, max_length=order, include_path=True)
def read_pathcollection(filename: str, separator: str = ',', frequency: bool = False, directed: bool = True, maxlines: int = None) -> PathCollection: """Read path in edgelist format Reads data from a file containing multiple lines of *edges* of the form "v,w,frequency,X" (where frequency is optional and X are arbitrary additional columns). The default separating character ',' can be changed. Parameters ---------- filename : str path to edgelist file separator : str character separating the nodes frequency : bool is a frequency given? if ``True`` it is the last element in the edge (i.e. ``a,b,2``) directed : bool are the edges directed or undirected maxlines : int number of lines to read (useful to test large files). None means the entire file is read """ from pathpy.core.path import Path, PathCollection nodes: dict = {} edges: dict = {} paths: dict = {} with open(filename, 'r') as csv: for n, line in enumerate(csv): fields = line.rstrip().split(separator) assert len(fields) >= 1, 'Error: empty line: {0}'.format(line) if frequency: path = tuple(fields[:-1]) freq = float(fields[-1]) else: path = tuple(fields) freq = 1.0 for node in path: if node not in nodes: nodes[node] = Node(node) if len(path) == 1 and path not in paths: paths[path] = Path(nodes[path[0]], frequency=freq) else: edge_list = [] for u, v in zip(path[:-1], path[1:]): if (u, v) not in edges: edges[(u, v)] = Edge(nodes[u], nodes[v]) edge_list.append(edges[(u, v)]) if path not in paths: paths[path] = Path(*edge_list, frequency=freq) if maxlines is not None and n >= maxlines: break ncoll = NodeCollection() for node in nodes.values(): ncoll.add(node) ecoll = EdgeCollection(nodes=ncoll) for edge in edges.values(): ecoll._add(edge) _paths = PathCollection(directed=directed, nodes=ncoll, edges=ecoll) for _path in paths.values(): _paths._add(_path) return _paths
def read_file(cls, filename: str, separator: str = ',', frequency: bool = False, directed: bool = True, maxlines: int = None) -> None: """ Read path in edgelist format Reads data from a file containing multiple lines of *edges* of the form "v,w,frequency,X" (where frequency is optional and X are arbitrary additional columns). The default separating character ',' can be changed. Parameters ---------- filename : str path to edgelist file separator : str character separating the nodes frequency : bool is a frequency given? if ``True`` it is the last element in the edge (i.e. ``a,b,2``) directed : bool are the edges directed or undirected maxlines : int number of lines to read (useful to test large files). None means the entire file is read """ nodes = {} edges = {} paths = {} with open(filename, 'r') as f: for n, line in enumerate(f): fields = line.rstrip().split(separator) assert len(fields) >= 2, 'Error: malformed line: {0}'.format( line) if frequency: path = tuple(fields[:-1]) f = int(fields[-1]) else: path = tuple(fields) f = 1 for node in path: if node not in nodes: nodes[node] = Node(node) edge_list = [] for u, v in zip(path[:-1], path[1:]): if (u, v) not in edges: edges[(u, v)] = Edge(nodes[u], nodes[v], uid=u + '-' + v) edge_list.append(edges[(u, v)]) if path not in paths: paths[path] = Path(*edge_list, frequency=f) if maxlines is not None and n >= maxlines: break nc = NodeCollection() nc.add(*nodes.values()) ec = EdgeCollection(nodes=nc) for edge in edges.values(): ec._add(edge) p = PathCollection(nodes=nc, edges=ec) for path in paths.values(): p._add(path) return p
def test_NodeCollection(): """Test node collection""" nodes = NodeCollection() assert len(nodes) == 0 a = Node('a') nodes.add(a) assert len(nodes) == 1 assert nodes['a'] == a assert nodes[a] == a assert 'a' in nodes assert a in nodes assert 'a' in nodes.uids assert 'a' in nodes.keys() assert a in nodes.values() assert ('a', a) in nodes.items() assert {'a': a} == nodes.dict nodes.add('b', 'c') print(nodes) assert len(nodes) == 3 with pytest.raises(Exception): nodes.add('a') with pytest.raises(Exception): nodes.add(a) d = Node('d', color='blue') nodes.add(d) assert nodes['d']['color'] == 'blue' d['color'] = 'red' assert nodes['d']['color'] == 'red' nodes.add('e', ('f', 'g'), ['h', 'i']) assert len(nodes) == 9 nodes.remove(('e', 'f', 'g'), 'h', ['i']) assert len(nodes) == 4