def _(self, data: PathCollection, order: Optional[int] = None) -> None: # Check order if order is not None: self._order = order if 0 <= self.order <= 1: super().fit(data, order=self.order) elif self.order > 1: # --- START --- nc = NodeCollection() for node in data.nodes.values(): nc.add(node) ec = EdgeCollection(nodes=nc) for edge in data.edges.values(): ec.add(edge) self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec) # --- END --- # get path data paths = data # generate first order representation of data network = Network.from_paths(paths, frequencies=True) self.calculate(network, paths) else: LOG.error('A Null Model with order %s is not supported', self.order) raise AttributeError
def test_EdgeCollection_undirected(): """Test undirected edge collection""" edges = EdgeCollection(directed=False) edges.add('a', 'b') edges.add('b', 'a') assert len(edges) == 1 assert edges['a', 'b'].directed == False assert ('a', 'b') in edges assert ('b', 'a') in edges
def _(self, data: Network, order: Optional[int] = None) -> None: # Check order if order is not None: self._order = order if 0 <= self.order <= 1: super().fit(data, order=self.order) elif self.order > 1: # TODO: create function to transfer base data from PathCollection object # --- START --- nc = NodeCollection() for node in data.nodes.values(): nc.add(node) ec = EdgeCollection(nodes=nc) for edge in data.edges.values(): ec.add(edge) self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec) # --- END --- # get network data network = data # generate a path representation of the data paths = PathCollection(directed=network.directed, nodes=network.nodes, edges=network.edges) for edge in data.edges: paths.add(edge, frequency=edge.attributes.get('frequency', 1)) self.calculate(network, paths) else: LOG.error('A Null Model with order %s is not supported', self.order) raise AttributeError
def __init__(self, directed: bool = True, multiedges: bool = False, multipaths: bool = False, nodes: Optional[NodeCollection] = None, edges: Optional[EdgeCollection] = None) -> None: """Initialize the network object.""" # pylint: disable=too-many-arguments # initialize the base class super().__init__() # inidcator whether the network is directed or undirected self._directed: bool = directed # indicator whether the network has multi-edges self._multiedges: bool = multiedges # indicator whether the network has multi-edges self._multipaths: bool = multipaths # collection of nodes self._nodes: NodeCollection = NodeCollection() if nodes is not None: self._nodes = nodes elif nodes is None and edges is not None: self._nodes = edges.nodes # collection of edges self._edges: EdgeCollection = EdgeCollection(directed=directed, multiedges=multiedges, nodes=self._nodes) if edges is not None: self._edges = edges # map node tuples to paths self._nodes_map: defaultdict = defaultdict(PathSet) # map single node to paths self._node_map: defaultdict = defaultdict(set) # map edge tuples to paths self._edges_map: defaultdict = defaultdict(PathSet) # map single node to paths self._edge_map: defaultdict = defaultdict(set) # class of objects self._path_class: Any = Path
def test_multiedges(): a = Node('a') b = Node('b') c = Node('c') d = Node('d') e1 = Edge(a, b, uid='a-b') e2 = Edge(a, b, uid='e2') e3 = Edge(c, d, uid='a-b') edges = EdgeCollection() edges.add(e1) # with pytest.raises(Exception): # edges.add(e2) # with pytest.raises(Exception): # edges.add(e3) edges = EdgeCollection(multiedges=True) edges.add(e1) edges.add(e2)
def __init__(self, uid: Optional[str] = None, directed: bool = True, multiedges: bool = False, **kwargs: Any) -> None: """Initialize the network object.""" # initialize the base class super().__init__(uid=uid, **kwargs) # inidcator whether the network is directed or undirected self._directed: bool = directed # indicator whether the network has multi-edges self._multiedges: bool = multiedges # # a container for the network properties self._properties: defaultdict = defaultdict() # a container for node objects self._nodes: NodeCollection = NodeCollection() # a container for edge objects self._edges: EdgeCollection = EdgeCollection(directed=directed, multiedges=multiedges, nodes=self._nodes) # add attributes to the network self.attributes.update(**kwargs) # add network properties self._properties['edges'] = set() self._properties['successors'] = defaultdict(set) self._properties['predecessors'] = defaultdict(set) self._properties['outgoing'] = defaultdict(set) self._properties['incoming'] = defaultdict(set) self._properties['neighbors'] = defaultdict(set) self._properties['incident_edges'] = defaultdict(set) self._properties['indegrees'] = defaultdict(float) self._properties['outdegrees'] = defaultdict(float) self._properties['degrees'] = defaultdict(float)
def test_EdgeCollection_multiedges(): """Test the EdgeCollection""" edges = EdgeCollection(multiedges=True) assert len(edges) == 0 a = Node('a') b = Node('b') ab = Edge(a, b, uid='a-b') edges.add(ab) edges.add(a, b, uid='new') assert len(edges) == 2 assert edges['a-b'] == ab assert len(edges['a', 'b']) == 2 assert len(edges[a, b]) == 2
def _(self, data: PathCollection, order: Optional[int] = None, subpaths: bool = True) -> None: if order is not None: self._order = order order = self.order # TODO: create function to transfer base data from PathCollection object # --- START --- nc = NodeCollection() for node in data.nodes.values(): nc.add(node) ec = EdgeCollection(nodes=nc) for edge in data.edges.values(): ec.add(edge) self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec) # --- END --- # iterate over all paths for path in data: # get frequency of the observed path # TODO: define keyword in config file frequency = path.attributes.get('frequency', 1) nodes: list = [] if order == 0: for node in path.nodes: if (node, ) not in self.nodes: self.add_node(node, frequency=0.0) #self.nodes[(node,)]['frequency'] += frequency for node in path.nodes: self.nodes[(node, )]['frequency'] += frequency elif order == 1: nodes.extend([tuple([n]) for n in path.nodes]) elif 1 < order <= len(path): for subpath in self.window(path.edges, size=order - 1): nodes.append(subpath) elif order == len(path) + 1: if tuple(path.edges) not in self.nodes: self.nodes.add(tuple(path.edges)) else: pass _edges = [] for _v, _w in zip(nodes[:-1], nodes[1:]): if _v not in self.nodes: self.nodes.add(_v) if _w not in self.nodes: self.nodes.add(_w) _nodes = (self.nodes[_v], self.nodes[_w]) if _nodes not in self.edges: self.add_edge(*_nodes, possible=0, observed=0, frequency=0) _edges.append(self.edges[_nodes]) for edge in _edges: edge['frequency'] += frequency if order == len(path): edge['observed'] += frequency else: edge['possible'] += frequency if order == 0: frequencies = [n['frequency'] for n in self.nodes] for node in self.nodes: node['frequency'] = node['frequency'] / sum(frequencies) if subpaths: self._subpaths = SubPathCollection.from_paths(data, max_length=order, include_path=True)
def test_EdgeCollection(): """Test the EdgeCollection""" edges = EdgeCollection() assert len(edges) == 0 a = Node('a') b = Node('b') ab = Edge(a, b, uid='a-b') edges.add(ab) with pytest.raises(Exception): edges.add(ab) assert len(edges) == 1 assert edges['a-b'] == ab assert edges[ab] == ab assert 'a-b' in edges assert ab in edges assert 'a-b' in edges.uids assert 'a-b' in edges.keys() assert ab in edges.values() assert ('a-b', ab) in edges.items() assert {'a-b': ab} == edges.dict assert len(edges.nodes) == 2 assert edges.nodes['a'] == a assert edges.nodes[a] == a assert 'a' in edges.nodes assert a in edges.nodes assert 'a' in edges.nodes.uids assert 'a' in edges.nodes.keys() assert a in edges.nodes.values() assert ('a', a) in edges.nodes.items() assert {'a': a, 'b': b} == edges.nodes.dict with pytest.raises(Exception): edges.add((a)) c = Node('c') d = Node('d') edges.add(c, d, uid='c-d') assert len(edges) == 2 assert edges['c-d'].v == c edges.add('e', 'f', uid='e-f') assert len(edges) == 3 assert 'e' and 'f' in edges.nodes for _e in [('f', 'g'), ('g', 'h')]: edges.add(_e) assert len(edges) == 5 edges.add('e', nodes=False) assert len(edges) == 6 assert 'e' in edges assert isinstance(edges['e'].v, Node) assert isinstance(edges['e'].w, Node) assert len(edges.nodes) == 10 _v = edges['e'].v.uid _w = edges['e'].w.uid edges.remove('e') assert len(edges) == 5 assert 'e' not in edges # edges._remove_node(_v) # edges._remove_node(_w) # assert len(edges.nodes) == 8 edges.remove('g', 'h') edges.remove(('f', 'g')) assert len(edges) == 3 edges.remove(ab, 'c-d') assert len(edges) == 2 assert len(edges.nodes) == 10 edges = EdgeCollection() edges.add('a', 'b') with pytest.raises(Exception): edges.add('a', 'b') edges = EdgeCollection() edges.add('a', 'b', uid='e1') edges.add('b', 'c', uid='e2') edges.add('c', 'd', uid='e3') edges.add('d', 'e', uid='e4') assert len(edges) == 4 edges.remove('e1') assert len(edges) == 3 for _e in ['e2', 'e3']: edges.remove(_e) assert len(edges) == 1
def read_pathcollection(filename: str, separator: str = ',', frequency: bool = False, directed: bool = True, maxlines: int = None) -> PathCollection: """Read path in edgelist format Reads data from a file containing multiple lines of *edges* of the form "v,w,frequency,X" (where frequency is optional and X are arbitrary additional columns). The default separating character ',' can be changed. Parameters ---------- filename : str path to edgelist file separator : str character separating the nodes frequency : bool is a frequency given? if ``True`` it is the last element in the edge (i.e. ``a,b,2``) directed : bool are the edges directed or undirected maxlines : int number of lines to read (useful to test large files). None means the entire file is read """ from pathpy.core.path import Path, PathCollection nodes: dict = {} edges: dict = {} paths: dict = {} with open(filename, 'r') as csv: for n, line in enumerate(csv): fields = line.rstrip().split(separator) assert len(fields) >= 1, 'Error: empty line: {0}'.format(line) if frequency: path = tuple(fields[:-1]) freq = float(fields[-1]) else: path = tuple(fields) freq = 1.0 for node in path: if node not in nodes: nodes[node] = Node(node) if len(path) == 1 and path not in paths: paths[path] = Path(nodes[path[0]], frequency=freq) else: edge_list = [] for u, v in zip(path[:-1], path[1:]): if (u, v) not in edges: edges[(u, v)] = Edge(nodes[u], nodes[v]) edge_list.append(edges[(u, v)]) if path not in paths: paths[path] = Path(*edge_list, frequency=freq) if maxlines is not None and n >= maxlines: break ncoll = NodeCollection() for node in nodes.values(): ncoll.add(node) ecoll = EdgeCollection(nodes=ncoll) for edge in edges.values(): ecoll._add(edge) _paths = PathCollection(directed=directed, nodes=ncoll, edges=ecoll) for _path in paths.values(): _paths._add(_path) return _paths
def test_EdgeCollection(): """Test the EdgeCollection""" edges = EdgeCollection(color='green') assert len(edges) == 0 a = Node('a') b = Node('b') ab = Edge(a, b, uid='a-b') edges.add(ab) # with pytest.raises(Exception): # edges.add(ab) assert len(edges) == 1 assert edges['a-b'] == ab assert edges[ab] == ab assert 'a-b' in edges assert ab in edges assert 'a-b' in edges.uids assert 'a-b' in edges.keys() assert ab in edges.values() assert ('a-b', ab) in edges.items() assert len(edges.nodes) == 2 assert edges.nodes['a'] == a assert edges.nodes[a.uid] == a assert 'a' in edges.nodes assert a in edges.nodes.values() # assert 'a' in edges.nodes.uids assert 'a' in edges.nodes.keys() assert a in edges.nodes.values() # assert ('a', a) in edges.nodes.items() # with pytest.raises(Exception): # edges.add((a)) c = Node('c') d = Node('d') edges.add(c, d, uid='c-d') assert len(edges) == 2 assert edges['c-d'].v.uid == 'c' edges.add('e', 'f', uid='e-f') assert len(edges) == 3 assert 'e' and 'f' in edges.nodes for _e in [('f', 'g'), ('g', 'h')]: edges.add(_e) assert len(edges) == 5 # edges.add('e', nodes=False) # assert len(edges) == 6 # assert 'e' in edges # assert isinstance(edges['e'].v, Node) # assert isinstance(edges['e'].w, Node) # assert len(edges.nodes) == 10 # _v = edges['e'].v.uid # _w = edges['e'].w.uid # edges.remove('e') # assert len(edges) == 5 # assert 'e' not in edges # # edges._remove_node(_v) # # edges._remove_node(_w) # # assert len(edges.nodes) == 8 edges.remove('g', 'h') edges.remove(('f', 'g')) assert len(edges) == 3 edges.remove(ab) edges.remove('c-d') assert len(edges) == 1 # assert len(edges.nodes) == 10 edges = EdgeCollection() edges.add('a', 'b') # with pytest.raises(Exception): # edges.add('a', 'b') edges = EdgeCollection() edges.add('a', 'b', uid='e1') edges.add('b', 'c', uid='e2') edges.add('c', 'd', uid='e3') edges.add('d', 'e', uid='e4') assert len(edges) == 4 edges.remove('e1') assert len(edges) == 3 for _e in ['e2', 'e3']: edges.remove(_e) assert len(edges) == 1
def test_EdgeCollection_for_HyperEdges(): """Test the EdgeCollection with hyperedges.""" a = Node('a') b = Node('b') c = Node('c') d = Node('d') e = HyperEdge({a, b}, {c, d}, uid='ab-cd') edges = EdgeCollection(hyperedges=False) with pytest.raises(Exception): edges.add(e) with pytest.raises(Exception): edges.add({a, b}, {c, d}) edges = EdgeCollection(hyperedges=True) edges.add(e) assert len(edges) == 1 assert e in edges assert len(edges.nodes) == 4 assert a and b and c and d in edges.nodes assert ({'a', 'b'}, {'c', 'd'}) in edges assert ({'b', 'a'}, {'c', 'd'}) in edges assert ({'a', 'b'}, {'d', 'c'}) in edges assert ({'b', 'a'}, {'d', 'c'}) in edges assert edges[{'a', 'b'}, {'c', 'd'}] == e assert edges[{'b', 'a'}, {'c', 'd'}] == e assert edges[{'a', 'b'}, {'d', 'c'}] == e assert edges[{'b', 'a'}, {'d', 'c'}] == e edges.add({a, 'c'}, {'b', d}, uid='ac-bd') assert len(edges) == 2 assert ({'c', 'a'}, {'b', 'd'}) in edges edges.remove({'a', 'b'}, {'c', 'd'}) assert len(edges) == 1 assert 'ab-cd' not in edges edges.remove('ac-bd') assert len(edges) == 0
def read_file(cls, filename: str, separator: str = ',', frequency: bool = False, directed: bool = True, maxlines: int = None) -> None: """ Read path in edgelist format Reads data from a file containing multiple lines of *edges* of the form "v,w,frequency,X" (where frequency is optional and X are arbitrary additional columns). The default separating character ',' can be changed. Parameters ---------- filename : str path to edgelist file separator : str character separating the nodes frequency : bool is a frequency given? if ``True`` it is the last element in the edge (i.e. ``a,b,2``) directed : bool are the edges directed or undirected maxlines : int number of lines to read (useful to test large files). None means the entire file is read """ nodes = {} edges = {} paths = {} with open(filename, 'r') as f: for n, line in enumerate(f): fields = line.rstrip().split(separator) assert len(fields) >= 2, 'Error: malformed line: {0}'.format( line) if frequency: path = tuple(fields[:-1]) f = int(fields[-1]) else: path = tuple(fields) f = 1 for node in path: if node not in nodes: nodes[node] = Node(node) edge_list = [] for u, v in zip(path[:-1], path[1:]): if (u, v) not in edges: edges[(u, v)] = Edge(nodes[u], nodes[v], uid=u + '-' + v) edge_list.append(edges[(u, v)]) if path not in paths: paths[path] = Path(*edge_list, frequency=f) if maxlines is not None and n >= maxlines: break nc = NodeCollection() nc.add(*nodes.values()) ec = EdgeCollection(nodes=nc) for edge in edges.values(): ec._add(edge) p = PathCollection(nodes=nc, edges=ec) for path in paths.values(): p._add(path) return p