Example #1
0
    def _(self, data: PathCollection, order: Optional[int] = None) -> None:

        # Check order
        if order is not None:
            self._order = order

        if 0 <= self.order <= 1:
            super().fit(data, order=self.order)

        elif self.order > 1:
            # --- START ---
            nc = NodeCollection()
            for node in data.nodes.values():
                nc.add(node)

            ec = EdgeCollection(nodes=nc)
            for edge in data.edges.values():
                ec.add(edge)

            self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec)
            # --- END ---

            # get path data
            paths = data

            # generate first order representation of data
            network = Network.from_paths(paths, frequencies=True)

            self.calculate(network, paths)

        else:
            LOG.error('A Null Model with order %s is not supported',
                      self.order)
            raise AttributeError
Example #2
0
def test_EdgeCollection_undirected():
    """Test undirected edge collection"""

    edges = EdgeCollection(directed=False)
    edges.add('a', 'b')
    edges.add('b', 'a')
    assert len(edges) == 1

    assert edges['a', 'b'].directed == False
    assert ('a', 'b') in edges
    assert ('b', 'a') in edges
Example #3
0
    def _(self, data: Network, order: Optional[int] = None) -> None:

        # Check order
        if order is not None:
            self._order = order

        if 0 <= self.order <= 1:
            super().fit(data, order=self.order)

        elif self.order > 1:

            # TODO: create function to transfer base data from PathCollection object
            # --- START ---
            nc = NodeCollection()
            for node in data.nodes.values():
                nc.add(node)

            ec = EdgeCollection(nodes=nc)
            for edge in data.edges.values():
                ec.add(edge)

            self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec)
            # --- END ---

            # get network data
            network = data

            # generate a path representation of the data
            paths = PathCollection(directed=network.directed,
                                   nodes=network.nodes,
                                   edges=network.edges)
            for edge in data.edges:
                paths.add(edge, frequency=edge.attributes.get('frequency', 1))

            self.calculate(network, paths)

        else:
            LOG.error('A Null Model with order %s is not supported',
                      self.order)
            raise AttributeError
Example #4
0
    def __init__(self, directed: bool = True,
                 multiedges: bool = False,
                 multipaths: bool = False,
                 nodes: Optional[NodeCollection] = None,
                 edges: Optional[EdgeCollection] = None) -> None:
        """Initialize the network object."""
        # pylint: disable=too-many-arguments

        # initialize the base class
        super().__init__()

        # inidcator whether the network is directed or undirected
        self._directed: bool = directed

        # indicator whether the network has multi-edges
        self._multiedges: bool = multiedges

        # indicator whether the network has multi-edges
        self._multipaths: bool = multipaths

        # collection of nodes
        self._nodes: NodeCollection = NodeCollection()

        if nodes is not None:
            self._nodes = nodes
        elif nodes is None and edges is not None:
            self._nodes = edges.nodes

        # collection of edges
        self._edges: EdgeCollection = EdgeCollection(directed=directed,
                                                     multiedges=multiedges,
                                                     nodes=self._nodes)
        if edges is not None:
            self._edges = edges

        # map node tuples to paths
        self._nodes_map: defaultdict = defaultdict(PathSet)

        # map single node to paths
        self._node_map: defaultdict = defaultdict(set)

        # map edge tuples to paths
        self._edges_map: defaultdict = defaultdict(PathSet)

        # map single node to paths
        self._edge_map: defaultdict = defaultdict(set)

        # class of objects
        self._path_class: Any = Path
Example #5
0
def test_multiedges():
    a = Node('a')
    b = Node('b')
    c = Node('c')
    d = Node('d')

    e1 = Edge(a, b, uid='a-b')
    e2 = Edge(a, b, uid='e2')
    e3 = Edge(c, d, uid='a-b')

    edges = EdgeCollection()
    edges.add(e1)

    # with pytest.raises(Exception):
    #     edges.add(e2)
    # with pytest.raises(Exception):
    #     edges.add(e3)

    edges = EdgeCollection(multiedges=True)
    edges.add(e1)
    edges.add(e2)
Example #6
0
    def __init__(self,
                 uid: Optional[str] = None,
                 directed: bool = True,
                 multiedges: bool = False,
                 **kwargs: Any) -> None:
        """Initialize the network object."""

        # initialize the base class
        super().__init__(uid=uid, **kwargs)

        # inidcator whether the network is directed or undirected
        self._directed: bool = directed

        # indicator whether the network has multi-edges
        self._multiedges: bool = multiedges

        # # a container for the network properties
        self._properties: defaultdict = defaultdict()

        # a container for node objects
        self._nodes: NodeCollection = NodeCollection()

        # a container for edge objects
        self._edges: EdgeCollection = EdgeCollection(directed=directed,
                                                     multiedges=multiedges,
                                                     nodes=self._nodes)

        # add attributes to the network
        self.attributes.update(**kwargs)

        # add network properties
        self._properties['edges'] = set()
        self._properties['successors'] = defaultdict(set)
        self._properties['predecessors'] = defaultdict(set)
        self._properties['outgoing'] = defaultdict(set)
        self._properties['incoming'] = defaultdict(set)
        self._properties['neighbors'] = defaultdict(set)
        self._properties['incident_edges'] = defaultdict(set)
        self._properties['indegrees'] = defaultdict(float)
        self._properties['outdegrees'] = defaultdict(float)
        self._properties['degrees'] = defaultdict(float)
Example #7
0
def test_EdgeCollection_multiedges():
    """Test the EdgeCollection"""
    edges = EdgeCollection(multiedges=True)

    assert len(edges) == 0

    a = Node('a')
    b = Node('b')
    ab = Edge(a, b, uid='a-b')

    edges.add(ab)
    edges.add(a, b, uid='new')

    assert len(edges) == 2
    assert edges['a-b'] == ab
    assert len(edges['a', 'b']) == 2
    assert len(edges[a, b]) == 2
    def _(self,
          data: PathCollection,
          order: Optional[int] = None,
          subpaths: bool = True) -> None:

        if order is not None:
            self._order = order

        order = self.order
        # TODO: create function to transfer base data from PathCollection object
        # --- START ---
        nc = NodeCollection()
        for node in data.nodes.values():
            nc.add(node)

        ec = EdgeCollection(nodes=nc)
        for edge in data.edges.values():
            ec.add(edge)

        self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec)
        # --- END ---

        # iterate over all paths
        for path in data:

            # get frequency of the observed path
            # TODO: define keyword in config file
            frequency = path.attributes.get('frequency', 1)

            nodes: list = []
            if order == 0:
                for node in path.nodes:
                    if (node, ) not in self.nodes:
                        self.add_node(node, frequency=0.0)
                    #self.nodes[(node,)]['frequency'] += frequency

                for node in path.nodes:
                    self.nodes[(node, )]['frequency'] += frequency
            elif order == 1:
                nodes.extend([tuple([n]) for n in path.nodes])

            elif 1 < order <= len(path):
                for subpath in self.window(path.edges, size=order - 1):
                    nodes.append(subpath)

            elif order == len(path) + 1:
                if tuple(path.edges) not in self.nodes:
                    self.nodes.add(tuple(path.edges))

            else:
                pass

            _edges = []
            for _v, _w in zip(nodes[:-1], nodes[1:]):

                if _v not in self.nodes:
                    self.nodes.add(_v)

                if _w not in self.nodes:
                    self.nodes.add(_w)

                _nodes = (self.nodes[_v], self.nodes[_w])
                if _nodes not in self.edges:
                    self.add_edge(*_nodes, possible=0, observed=0, frequency=0)

                _edges.append(self.edges[_nodes])

            for edge in _edges:
                edge['frequency'] += frequency
                if order == len(path):
                    edge['observed'] += frequency
                else:
                    edge['possible'] += frequency

        if order == 0:
            frequencies = [n['frequency'] for n in self.nodes]
            for node in self.nodes:
                node['frequency'] = node['frequency'] / sum(frequencies)

        if subpaths:
            self._subpaths = SubPathCollection.from_paths(data,
                                                          max_length=order,
                                                          include_path=True)
Example #9
0
def test_EdgeCollection():
    """Test the EdgeCollection"""
    edges = EdgeCollection()

    assert len(edges) == 0

    a = Node('a')
    b = Node('b')
    ab = Edge(a, b, uid='a-b')

    edges.add(ab)
    with pytest.raises(Exception):
        edges.add(ab)

    assert len(edges) == 1
    assert edges['a-b'] == ab
    assert edges[ab] == ab
    assert 'a-b' in edges
    assert ab in edges
    assert 'a-b' in edges.uids
    assert 'a-b' in edges.keys()
    assert ab in edges.values()
    assert ('a-b', ab) in edges.items()
    assert {'a-b': ab} == edges.dict

    assert len(edges.nodes) == 2
    assert edges.nodes['a'] == a
    assert edges.nodes[a] == a
    assert 'a' in edges.nodes
    assert a in edges.nodes
    assert 'a' in edges.nodes.uids
    assert 'a' in edges.nodes.keys()
    assert a in edges.nodes.values()
    assert ('a', a) in edges.nodes.items()
    assert {'a': a, 'b': b} == edges.nodes.dict

    with pytest.raises(Exception):
        edges.add((a))

    c = Node('c')
    d = Node('d')

    edges.add(c, d, uid='c-d')

    assert len(edges) == 2
    assert edges['c-d'].v == c

    edges.add('e', 'f', uid='e-f')

    assert len(edges) == 3
    assert 'e' and 'f' in edges.nodes

    for _e in [('f', 'g'), ('g', 'h')]:
        edges.add(_e)

    assert len(edges) == 5

    edges.add('e', nodes=False)

    assert len(edges) == 6
    assert 'e' in edges
    assert isinstance(edges['e'].v, Node)
    assert isinstance(edges['e'].w, Node)
    assert len(edges.nodes) == 10

    _v = edges['e'].v.uid
    _w = edges['e'].w.uid

    edges.remove('e')
    assert len(edges) == 5
    assert 'e' not in edges

    # edges._remove_node(_v)
    # edges._remove_node(_w)
    # assert len(edges.nodes) == 8

    edges.remove('g', 'h')
    edges.remove(('f', 'g'))

    assert len(edges) == 3

    edges.remove(ab, 'c-d')
    assert len(edges) == 2
    assert len(edges.nodes) == 10

    edges = EdgeCollection()
    edges.add('a', 'b')
    with pytest.raises(Exception):
        edges.add('a', 'b')

    edges = EdgeCollection()
    edges.add('a', 'b', uid='e1')
    edges.add('b', 'c', uid='e2')
    edges.add('c', 'd', uid='e3')
    edges.add('d', 'e', uid='e4')

    assert len(edges) == 4

    edges.remove('e1')
    assert len(edges) == 3

    for _e in ['e2', 'e3']:
        edges.remove(_e)

    assert len(edges) == 1
Example #10
0
def read_pathcollection(filename: str,
                        separator: str = ',',
                        frequency: bool = False,
                        directed: bool = True,
                        maxlines: int = None) -> PathCollection:
    """Read path in edgelist format

    Reads data from a file containing multiple lines of *edges* of the form
    "v,w,frequency,X" (where frequency is optional and X are arbitrary
    additional columns). The default separating character ',' can be changed.

    Parameters
    ----------
    filename : str
        path to edgelist file
    separator : str
        character separating the nodes
    frequency : bool
        is a frequency given? if ``True`` it is the last element in the
        edge (i.e. ``a,b,2``)
    directed : bool
        are the edges directed or undirected
    maxlines : int
        number of lines to read (useful to test large files).
        None means the entire file is read

    """

    from pathpy.core.path import Path, PathCollection

    nodes: dict = {}
    edges: dict = {}
    paths: dict = {}

    with open(filename, 'r') as csv:
        for n, line in enumerate(csv):
            fields = line.rstrip().split(separator)
            assert len(fields) >= 1, 'Error: empty line: {0}'.format(line)

            if frequency:
                path = tuple(fields[:-1])
                freq = float(fields[-1])
            else:
                path = tuple(fields)
                freq = 1.0

            for node in path:
                if node not in nodes:
                    nodes[node] = Node(node)

            if len(path) == 1 and path not in paths:
                paths[path] = Path(nodes[path[0]], frequency=freq)

            else:
                edge_list = []
                for u, v in zip(path[:-1], path[1:]):
                    if (u, v) not in edges:
                        edges[(u, v)] = Edge(nodes[u], nodes[v])
                    edge_list.append(edges[(u, v)])

                if path not in paths:
                    paths[path] = Path(*edge_list, frequency=freq)

            if maxlines is not None and n >= maxlines:
                break

    ncoll = NodeCollection()
    for node in nodes.values():
        ncoll.add(node)

    ecoll = EdgeCollection(nodes=ncoll)
    for edge in edges.values():
        ecoll._add(edge)

    _paths = PathCollection(directed=directed, nodes=ncoll, edges=ecoll)

    for _path in paths.values():
        _paths._add(_path)

    return _paths
Example #11
0
def test_EdgeCollection():
    """Test the EdgeCollection"""
    edges = EdgeCollection(color='green')

    assert len(edges) == 0

    a = Node('a')
    b = Node('b')
    ab = Edge(a, b, uid='a-b')

    edges.add(ab)

    # with pytest.raises(Exception):
    #     edges.add(ab)

    assert len(edges) == 1
    assert edges['a-b'] == ab
    assert edges[ab] == ab
    assert 'a-b' in edges
    assert ab in edges
    assert 'a-b' in edges.uids
    assert 'a-b' in edges.keys()
    assert ab in edges.values()
    assert ('a-b', ab) in edges.items()

    assert len(edges.nodes) == 2
    assert edges.nodes['a'] == a
    assert edges.nodes[a.uid] == a
    assert 'a' in edges.nodes
    assert a in edges.nodes.values()
    # assert 'a' in edges.nodes.uids
    assert 'a' in edges.nodes.keys()
    assert a in edges.nodes.values()
    # assert ('a', a) in edges.nodes.items()

    # with pytest.raises(Exception):
    #     edges.add((a))

    c = Node('c')
    d = Node('d')

    edges.add(c, d, uid='c-d')

    assert len(edges) == 2
    assert edges['c-d'].v.uid == 'c'

    edges.add('e', 'f', uid='e-f')

    assert len(edges) == 3
    assert 'e' and 'f' in edges.nodes

    for _e in [('f', 'g'), ('g', 'h')]:
        edges.add(_e)

    assert len(edges) == 5

    # edges.add('e', nodes=False)

    #     assert len(edges) == 6
    #     assert 'e' in edges
    #     assert isinstance(edges['e'].v, Node)
    #     assert isinstance(edges['e'].w, Node)
    #     assert len(edges.nodes) == 10

    #     _v = edges['e'].v.uid
    #     _w = edges['e'].w.uid

    #     edges.remove('e')
    #     assert len(edges) == 5
    #     assert 'e' not in edges

    #     # edges._remove_node(_v)
    #     # edges._remove_node(_w)
    #     # assert len(edges.nodes) == 8

    edges.remove('g', 'h')
    edges.remove(('f', 'g'))

    assert len(edges) == 3

    edges.remove(ab)
    edges.remove('c-d')
    assert len(edges) == 1
    # assert len(edges.nodes) == 10

    edges = EdgeCollection()
    edges.add('a', 'b')

    # with pytest.raises(Exception):
    #     edges.add('a', 'b')

    edges = EdgeCollection()
    edges.add('a', 'b', uid='e1')
    edges.add('b', 'c', uid='e2')
    edges.add('c', 'd', uid='e3')
    edges.add('d', 'e', uid='e4')

    assert len(edges) == 4

    edges.remove('e1')
    assert len(edges) == 3

    for _e in ['e2', 'e3']:
        edges.remove(_e)

    assert len(edges) == 1
def test_EdgeCollection_for_HyperEdges():
    """Test the EdgeCollection with hyperedges."""

    a = Node('a')
    b = Node('b')
    c = Node('c')
    d = Node('d')

    e = HyperEdge({a, b}, {c, d}, uid='ab-cd')

    edges = EdgeCollection(hyperedges=False)

    with pytest.raises(Exception):
        edges.add(e)

    with pytest.raises(Exception):
        edges.add({a, b}, {c, d})

    edges = EdgeCollection(hyperedges=True)
    edges.add(e)

    assert len(edges) == 1
    assert e in edges
    assert len(edges.nodes) == 4
    assert a and b and c and d in edges.nodes

    assert ({'a', 'b'}, {'c', 'd'}) in edges
    assert ({'b', 'a'}, {'c', 'd'}) in edges
    assert ({'a', 'b'}, {'d', 'c'}) in edges
    assert ({'b', 'a'}, {'d', 'c'}) in edges

    assert edges[{'a', 'b'}, {'c', 'd'}] == e
    assert edges[{'b', 'a'}, {'c', 'd'}] == e
    assert edges[{'a', 'b'}, {'d', 'c'}] == e
    assert edges[{'b', 'a'}, {'d', 'c'}] == e

    edges.add({a, 'c'}, {'b', d}, uid='ac-bd')

    assert len(edges) == 2
    assert ({'c', 'a'}, {'b', 'd'}) in edges

    edges.remove({'a', 'b'}, {'c', 'd'})
    assert len(edges) == 1
    assert 'ab-cd' not in edges

    edges.remove('ac-bd')
    assert len(edges) == 0
Example #13
0
    def read_file(cls,
                  filename: str,
                  separator: str = ',',
                  frequency: bool = False,
                  directed: bool = True,
                  maxlines: int = None) -> None:
        """
        Read path in edgelist format

        Reads data from a file containing multiple lines of *edges* of the
        form "v,w,frequency,X" (where frequency is optional and X are
        arbitrary additional columns). The default separating character ','
        can be changed.

        Parameters
        ----------
        filename : str
            path to edgelist file
        separator : str
            character separating the nodes
        frequency : bool
            is a frequency given? if ``True`` it is the last element in the
            edge (i.e. ``a,b,2``)
        directed : bool
            are the edges directed or undirected
        maxlines : int
            number of lines to read (useful to test large files).
            None means the entire file is read
        """
        nodes = {}
        edges = {}
        paths = {}

        with open(filename, 'r') as f:
            for n, line in enumerate(f):
                fields = line.rstrip().split(separator)
                assert len(fields) >= 2, 'Error: malformed line: {0}'.format(
                    line)

                if frequency:
                    path = tuple(fields[:-1])
                    f = int(fields[-1])
                else:
                    path = tuple(fields)
                    f = 1

                for node in path:
                    if node not in nodes:
                        nodes[node] = Node(node)

                edge_list = []
                for u, v in zip(path[:-1], path[1:]):
                    if (u, v) not in edges:
                        edges[(u, v)] = Edge(nodes[u],
                                             nodes[v],
                                             uid=u + '-' + v)
                    edge_list.append(edges[(u, v)])

                if path not in paths:
                    paths[path] = Path(*edge_list, frequency=f)

                if maxlines is not None and n >= maxlines:
                    break

        nc = NodeCollection()
        nc.add(*nodes.values())

        ec = EdgeCollection(nodes=nc)
        for edge in edges.values():
            ec._add(edge)

        p = PathCollection(nodes=nc, edges=ec)

        for path in paths.values():
            p._add(path)

        return p