Beispiel #1
0
    def __init__(self,
                 network: Network,
                 weight: Weight = None,
                 start_node: Optional[str] = None,
                 restart_prob=0) -> None:
        """Initialises a random walk process in a given start node.

        The initial time t of the random walk will be set to zero and the
        initial state is set to the given start node. If start_node is omitted a
        node will be chosen uniformly at random.

        """
        # initialize variables

        # network in which the random walk is simulated
        self._network: Network = network

        # time of the random walk
        self._t: int = 0

        # transition matrix for the random walk
        self._transition_matrix = RandomWalk.transition_matrix(
            network, weight, restart_prob)

        # uids of the nodes
        self._node_uids: list = list(network.nodes.keys())

        self._visitations = np.ravel(
            np.zeros(shape=(1, network.number_of_nodes())))

        # path of the random walker
        # TODO: implement new path class
        # self._path = Path()

        # eigenvectors and eigenvalues
        if network.number_of_nodes() > 2:
            _, eigenvectors = spl.eigs(self._transition_matrix.transpose(),
                                       k=1,
                                       which='LM')
            pi = eigenvectors.reshape(eigenvectors.size, )
        else:
            eigenvals, eigenvectors = spla.eig(
                self._transition_matrix.transpose().toarray())
            x = np.argsort(-eigenvals)
            pi = eigenvectors[x][:, 0]

        # stationary probabilities
        self._stationary_probabilities = np.real(pi / np.sum(pi))

        if start_node is None:
            self._current_node = np.random.choice(self._node_uids)
        elif start_node not in network.nodes:
            LOG.warning('Invalid start node for random walk. '
                        'Picking random node.')
            self._current_node = np.random.choice(self._node_uids)
        else:
            self._current_node = start_node

        self._visitations[network.nodes.index[self._current_node]] += 1
Beispiel #2
0
def ER_np_randomize(network: Network, loops: bool = False) -> Network:
    """Generates a random microstate based on the G(n,p) model. The number of nodes,
    the expected number of edges, the edge directedness and the node uids of the 
    generated network match the corresponding values of a given network instance.
    """

    n = network.number_of_nodes()
    m = network.number_of_edges()
    M = max_edges(n, directed=network.directed, loops=loops)
    p = m/M
    return ER_np(n=n, p=p, directed=network.directed, loops=loops, node_uids=list(network.nodes.uids))
Beispiel #3
0
def to_network(frame: pd.DataFrame,
               loops: bool = True,
               directed: bool = True,
               multiedges: bool = False,
               **kwargs: Any) -> Network:
    """Read network from a pandas data frame."""

    # if no v/w columns are included, pick first synonym
    frame = _check_column_name(frame, 'v', config['edge']['v_synonyms'])
    frame = _check_column_name(frame, 'w', config['edge']['w_synonyms'])

    LOG.debug('Creating %s network', directed)

    node_set = set(frame['v']).union(set(frame['w']))

    if None in node_set:
        LOG.error('DataFrame minimally needs columns \'v\' and \'w\'')
        raise IOError

    nodes = {n: Node(n) for n in node_set}

    edges: list = []
    edge_set: set = set()

    # TODO: Make this for loop faster!
    for row in frame.to_dict(orient='records'):
        v = row.pop('v')
        w = row.pop('w')
        uid = row.pop('uid', None)

        if (v, w) in edge_set and not multiedges:
            LOG.warning(
                'The edge (%s,%s) exist already '
                'and will not be considered. '
                'To capture this edge, please '
                'enalbe multiedges and/or directed!', v, w)
        elif loops or v != w:
            edges.append(Edge(nodes[v], nodes[w], uid=uid, **row))
            edge_set.add((v, w))
            if not directed:
                edge_set.add((w, v))
        else:
            continue

    net = Network(directed=directed, multiedges=multiedges, **kwargs)
    for node in nodes.values():
        net.nodes.add(node)

    for edge in edges:
        net.edges._add(edge)

    net._add_edge_properties()
    return net
Beispiel #4
0
def Q_max_modularity(network: Network, cluster_mapping: Dict) -> float:
    """Computes the maximum theoretically possible Q-modularity

    for a given network and cluster mapping
    """
    m = network.number_of_edges()
    qmax: float = 2 * m
    for v in network.nodes.uids:
        for w in network.nodes.uids:
            if cluster_mapping[v] == cluster_mapping[w]:
                qmax -= network.degrees()[v] * network.degrees()[w] / (2 * m)

    return qmax / (2 * m)
Beispiel #5
0
def Q_modularity(network: Network, cluster_mapping: Dict) -> float:
    """Computes the Q-modularity of a network for a given cluster mapping
    """
    A = network.adjacency_matrix()
    m = network.number_of_edges()

    q = 0.0
    for v in network.nodes.uids:
        for w in network.nodes.uids:
            if cluster_mapping[v] == cluster_mapping[w]:
                q += A[network.nodes.index[v], network.nodes.index[w]] - \
                    network.degrees()[v] * network.degrees()[w]/(2*m)
    return q / (2 * m)
Beispiel #6
0
def ER_nm_randomize(network: Network,
                    loops: bool = False,
                    multiedges: bool = False) -> Union[Network, None]:
    """Generates a random graph whose number of nodes, edges, edge directedness and node uids 
    match the corresponding values of a given network instance. Useful to generate a randomized 
    version of a network.
    
    Parameters
    ----------
    network : pathpy.Network

        Given network used to determine number of nodes, edges, node uids, and edge directedness    

    loops : bool

        Whether or not the generated network can contain loops.

    multi_edge : bool

        Whether or not multiple edges can be added to the same node pair

    Examples
    --------
    Generate random undirected network with 10 nodes and 25 edges

    >>> import pathpy as pp
    >>> n = pp.Network(directed=False)
    >>> n.add_edge('a', 'b')
    >>> n.add_edge('b', 'c')
    >>> n.add_edge('d', 'e')
    >>> r = pp.generators.ER_nm(n)
    >>> print(r)
    Uid:		0x...
    Type:		Network
    Directed:	False
    Unique nodes:	5
    Unique edges:	3
    Unique paths:	0
    Total paths:	0
    >>> print(r.nodes.uids)
    { 'a', 'b', 'c', 'd', 'e'}

    """

    return ER_nm(network.number_of_nodes(),
                 network.number_of_edges(),
                 directed=network.directed,
                 loops=loops,
                 multiedges=multiedges,
                 node_uids=list(network.nodes.uids))
    def _(self, data: PathCollection, order: Optional[int] = None) -> None:

        # Check order
        if order is not None:
            self._order = order

        if 0 <= self.order <= 1:
            super().fit(data, order=self.order)

        elif self.order > 1:
            # --- START ---
            nc = NodeCollection()
            for node in data.nodes.values():
                nc.add(node)

            ec = EdgeCollection(nodes=nc)
            for edge in data.edges.values():
                ec.add(edge)

            self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec)
            # --- END ---

            # get path data
            paths = data

            # generate first order representation of data
            network = Network.from_paths(paths, frequencies=True)

            self.calculate(network, paths)

        else:
            LOG.error('A Null Model with order %s is not supported',
                      self.order)
            raise AttributeError
Beispiel #8
0
def single_source_shortest_paths(
        network: Network,
        source: str,
        weight: Union[bool, str, None] = None) -> Union[dict, np.array]:
    """Calculates all shortest paths from a single given source node using a 
    custom implementation of Dijkstra's algorithm based on a priority queue.
    """
    Q: dict = dict()
    dist = dict()
    prev = dict()
    dist[source] = 0

    for v in network.nodes.uids:
        if v != source:
            dist[v] = np.inf
            prev[v] = None
        Q[v] = dist[v]

    while Q:
        u = min(Q.keys(), key=(lambda k: Q[
            k]))  # TODO: Do this more efficiently with a proper priority queue
        del Q[u]
        for v in network.successors[u]:

            # for networks with no edge costs, edges have constant cost
            cost = 1

            if weight == True:
                cost = list(network.edges[u, v])[0].attributes['weight']
            elif weight != False and weight != None:
                cost = list(network.edges[u, v])[0].attributes[weight]

            new_dist = dist[u] + cost

            if new_dist < dist[v.uid]:
                dist[v.uid] = new_dist
                prev[v.uid] = u
                if v.uid in Q:
                    Q[v.uid] = new_dist

    # calculate distance vector
    dist_arr = np.zeros(network.number_of_nodes())
    for v in network.nodes:
        dist_arr[network.nodes.index[v.uid]] = dist[v.uid]

    # construct shortest paths
    s_p: dict = dict()
    for dest in network.nodes:
        if dest.uid != source:
            path = [dest.uid]
            x = dest.uid
            while x != source and x != None:
                x = prev[x]
                path.append(x)
            if x == None:
                s_p[dest.uid] = None
            else:
                path.reverse()
                s_p[dest.uid] = tuple(path)
    return dist_arr, s_p
def closeness_centrality(network: Network, normalized: bool = False) -> Dict:
    """Calculates the closeness centrality of all nodes.

    .. note::

        If `normalized=False` (Default) for each node v the closeness centrality
        is given as 1/sum_w(dist(v,w)) where dist(v,w) is the shortest path
        distance between v and w. For `normalized=True` the counter is
        multiplied by n-1 where n is the number of nodes in the
        network. Shortest path distances are calculated using the function
        `shortest_paths.distance_matrix`.

    Parameters
    ----------
    network : Network

        The :py:class:`Network` object that contains the network

    normalized : bool

        If True the resulting centralities will be normalized based on the
        average shortest path length.

    Examples
    --------
    Compute closeness centrality in a simple network

    >>> import pathpy as pp
    >>> net = pp.Network(directed=False)
    >>> net.add_edge('a', 'x')
    >>> net.add_edge('x', 'b')
    >>> c = pp.algorithms.centralities.closeness_centrality(net)
    >>> c['a']
    0.3333333333333333

    """
    distances = shortest_paths.distance_matrix(network)
    cl: defaultdict = defaultdict(float)

    mapping = {v: k for k, v in network.nodes.index.items()}

    n = network.number_of_nodes()
    # calculate closeness values
    for d in range(n):
        for x in range(n):
            if d != x and distances[d, x] < np.inf:
                cl[mapping[x]] += distances[d, x]

    # assign centrality zero to nodes not occurring
    # on higher-order shortest paths
    for v in network.nodes.uids:
        cl[v] += 0.0
        if cl[v] > 0.0:
            cl[v] = 1.0 / cl[v]
        if normalized:
            cl[v] *= n-1

    return cl
def degree_centrality(network: Network, mode: str = 'degree') -> dict:
    """Calculates the degree centrality of all nodes.

    Parameters
    ----------
    network : Network

        The :py:class:`Network` object that contains the network

    mode : str

        Can be chose nas 'degree', 'indegree', or 'outdegree'. Determines
        whether to calculate undirected/total degrees, indegrees, or degrees

    Examples
    --------
    Compute degree centrality in a simple network

    >>> import pathpy as pp
    >>> net = pp.Network(directed=True)
    >>> net.add_edge('a', 'x')
    >>> net.add_edge('x', 'b')
    >>> c = pp.algorithms.centralities.degree_centrality(net)
    >>> c['a']
    1

    >>> c = pp.algorithms.centralities.degree_centrality(net, mode='indegree')
    >>> c['a']
    0

    """
    d: dict = dict()
    if mode not in set(['degree', 'indegree', 'outdegree']):
        LOG.error('Mode must be \'degree\', \'indegree\' or \'outdegree\'')
        raise KeyError

    for v in network.nodes.keys():
        if mode == 'indegree':
            d[v] = network.indegrees()[v]
        elif mode == 'outdegree':
            d[v] = network.outdegrees()[v]
        else:
            d[v] = network.degrees()[v]

    return d
Beispiel #11
0
def local_clustering_coefficient(network: Network, v: str) -> float:
    """Calculates the local clustering coefficient of a node in a network.


    The local clustering coefficient of any node with an (out-)degree smaller
    than two is defined as zero. For all other nodes, it is defined as:

        cc(c) := 2*k(i)/(d_i(d_i-1))

        or

        cc(c) := k(i)/(d_out_i(d_out_i-1))

        in undirected and directed networks respectively.

    Parameters
    ----------
    network : Network

        The network in which to calculate the local clustering coefficient

    node : str

        The node for which the local clustering coefficient shall be calculated

    """
    lcc: float = 0.
    d = network.degrees()
    o = network.outdegrees()

    if network.directed and o[v] >= 2 or network.directed == False and d[
            v] >= 2:
        k: int = 0
        for edge in network.edges:
            if (edge.v.uid != edge.w.uid and edge.v in network.successors[v]
                    and edge.w in network.successors[v]):
                k += 1

        if network.directed:
            lcc = k / (o[v] * (o[v] - 1))
        else:
            lcc = 2 * k / (d[v] * (d[v] - 1))

    return lcc
Beispiel #12
0
def Molloy_Reed_randomize(network: Network) -> Network:

    # degrees are listed in order of node indices
    degrees = network.degree_sequence()

    # generate node uids in same order
    node_uids = ['-'] * len(degrees)
    for v in network.nodes.uids:
        node_uids[network.nodes.index[v]] = v

    return Molloy_Reed(degrees, node_uids=node_uids)
Beispiel #13
0
def modularity_maximisation(network: Network,
                            iterations: int = 1000) -> Tuple[Dict, float]:
    """Modularity maximisation."""

    A = network.adjacency_matrix(weighted=False)
    D = network.degrees()
    n = network.number_of_nodes()
    m = network.number_of_edges()

    C = {}
    num_communities = n
    community_to_nodes = {}
    c = 0
    for v in network.nodes.uids:
        C[v] = c
        community_to_nodes[c] = set([v])
        c += 1
    q = _Q_merge(network, A, D, n, m, C)

    for i in tqdm(range(iterations), desc='maximising modularity'):

        # randomly choose two communities
        x, y = random.sample(community_to_nodes.keys(), 2)

        # check Q of merged communities
        q_new = _Q_merge(network, A, D, n, m, C, merge=set([x, y]))

        if q_new > q:
            # merge communities
            for v in community_to_nodes[x]:
                C[v] = y
            community_to_nodes[y] = community_to_nodes[y].union(
                community_to_nodes[x])
            q = q_new
            num_communities -= 1
            del community_to_nodes[x]

    return C, q
Beispiel #14
0
def distance_matrix(network: Network,
                    weight: Union[str, bool, None] = None) -> np.ndarray:
    """Calculates shortest path distances between all pairs of nodes

    .. note::

        Shortest paths are calculated using the implementation 
        of the Floyd-Warshall algorithm provided in `scipy.csgraph`.

    Parameters
    ----------
    network : Network

        The :py:class:`Network` object that contains the network

    weighted : bool

        If True cheapest paths will be calculated.

    Examples
    --------
    Generate a path and add it to the network.

    >>> import pathpy as pp
    >>> net = pp.Network()
    >>> net.add_edges(('a', 'x'), ('x', 'y'), ('y', 'c'))
    >>> m = pp.algorithms.shortest_paths.distance_matrix(net)
    >>> m[0,3]
    3

    Add shorter path

    >>> net.add_edges(('a', 'x'), ('x', 'c'))
    >>> m = pp.algorithms.shortest_paths.distance_matrix(net)
    >>> m[0,3]
    2
    """

    A = network.adjacency_matrix(weight=weight)
    dist_matrix = csgraph.floyd_warshall(A,
                                         network.directed,
                                         unweighted=(not weight),
                                         overwrite=False)

    return dist_matrix
Beispiel #15
0
    def calculate(self, network: Network, paths: PathCollection) -> None:
        """Calculate the null modell"""

        # get transition matrix of the underlying network
        transition_matrix = network.transition_matrix(weight='frequency')

        # generate all possible paths
        possible_paths = self.possible_paths(paths.edges, self.order)

        # Get all sub-paths of order-1
        subpaths = SubPathCollection.from_paths(paths,
                                                min_length=self.order - 1,
                                                max_length=self.order - 1,
                                                include_path=True)

        # add paths to the higer-order network
        for path in possible_paths:
            nodes: list = []
            for subpath in self.window(path, size=self.order - 1):
                nodes.append(subpath)

            for _v, _w in zip(nodes[:-1], nodes[1:]):

                if _v not in self.nodes:
                    self.nodes.add(_v)

                if _w not in self.nodes:
                    self.nodes.add(_w)

                _nodes = (self.nodes[_v], self.nodes[_w])

                # generate the expected frequencies of all possible paths
                if _v in subpaths:
                    frequency = subpaths.counter[subpaths[_v]] * \
                        transition_matrix[network.nodes.index[_w[-1].v.uid],
                                          network.nodes.index[_w[-1].w.uid]]

                else:
                    frequency = 0.0

                if _nodes not in self.edges:
                    self.add_edge(*_nodes,
                                  possible=0,
                                  observed=frequency,
                                  frequency=frequency)
Beispiel #16
0
def test_from_network():

    net = Network()
    net.add_edge('a', 'c', frequency=10)
    net.add_edge('c', 'd', frequency=10)
    net.add_edge('b', 'c', frequency=10)
    net.add_edge('c', 'e', frequency=10)

    null = NullModel.from_network(net, order=2)

    assert null.number_of_edges() == 4
    assert null.number_of_nodes() == 4

    for e in null.edges:
        assert e['frequency'] == 5.0
Beispiel #17
0
def largest_connected_component(network: Network) -> Network:
    """Returns the largest connected component of the network.
    """

    LOG.debug('Computing connected components')
    components = find_connected_components(network)
    max_size = 0
    max_comp: dict = {}
    for i in components:
        if len(components[i]) > max_size:
            max_size = len(components[i])
            max_comp = components[i]

    LOG.debug('Copying network')
    lcc = network.copy()

    LOG.debug('Removing nodes outside largest component')
    for v in list(lcc.nodes.keys()):
        if v not in max_comp:
            lcc.remove_node(v)
    return lcc
Beispiel #18
0
    def transition_matrix(network: Network,
                          weight: Weight = None,
                          restart_prob: float = 0) -> sp.sparse.csr_matrix:
        """Returns a transition matrix of the random walker.

        Returns a transition matrix that describes a random walk process in the
        given network.

        Parameters
        ----------
        network: Network

            The network for which the transition matrix will be created.

        weight: bool

            Whether to account for edge weights when computing transition
            probabilities.

        """
        A = adjacency_matrix(network, weight=weight)
        D = A.sum(axis=1)
        n = network.number_of_nodes()
        T = sp.sparse.csr_matrix((n, n))
        for i in range(n):
            for j in range(n):
                if D[i] > 0:
                    T[i, j] = restart_prob * (1. / n) + (
                        1 - restart_prob) * A[i, j] / D[i]
                else:
                    LOG.warning(
                        'Computing transition matrix for node with zero out-degree'
                    )
                    if restart_prob > 0:
                        T[i, j] = 1. / n
                    else:
                        T[i, j] = 0.0
        return T
Beispiel #19
0
def lattice_network(start: int = 0, stop: int = 10, dims: int = 2):
    """
    Generates a n-dimensional lattice network with coordinates in each dimension 
    ranging from start (inclusive) to stop (exclusive)
    """
    network = Network(directed=False)

    for pos in _multi_dim_range(start, stop, dims):
        network.add_node(
            Node("".join(str(i) + '-' for i in pos).strip('-'),
                 pos=np.array(pos)))

    for v in network.nodes:
        for w in network.nodes:
            if np.sum(np.abs(v['pos'] - w['pos'])) == 1 and (
                    v.uid, w.uid) not in network.edges:
                network.add_edge(v, w)
    return network
Beispiel #20
0
def check_tree(network: Network):

    if network.directed:

        # identify node with zero indegree
        root = None
        for v in network.nodes.uids:
            if network.indegrees()[v] == 0:
                if root == None:
                    root = v
                else:  # two nodes with in-degree zero -> no tree
                    return False
        if root == None:  # no node with indegree zero -> no tree
            return False

        visited = defaultdict(bool)

        def dfs(network: Network, node: str):

            nonlocal visited

            visited[node] = True
            tree = True
            for v in network.successors[node]:
                if visited[v.uid]:
                    tree &= False
                else:
                    tree &= dfs(network, v.uid)
            return tree

        return dfs(network, root)

    else:
        LOG.error('Tree checking not supported for undirected networks')

    return False
Beispiel #21
0
def find_connected_components(network: Network) -> Dict:
    """Computes connected components of a network.

    Parameters
    ----------

    network: Network

        Network instance

    Returns
    -------

    dict

        dictionary mapping node uids to components (represented as integer IDs)

    """

    if network.number_of_nodes() == 0 or network.number_of_edges() == 0:
        return dict()

    # these are used as nonlocal variables in tarjan
    index: int = 0
    S: list = []
    indices: defaultdict = defaultdict(lambda: None)
    low_link: defaultdict = defaultdict(lambda: None)
    on_stack: defaultdict = defaultdict(lambda: False)
    components: dict = {}

    def tarjan(v: str):
        """Tarjan's algorithm"""
        nonlocal index
        nonlocal S
        nonlocal indices
        nonlocal low_link
        nonlocal on_stack
        nonlocal components

        indices[v] = index
        low_link[v] = index
        index += 1
        S.append(v)
        on_stack[v] = True

        for node in network.successors[v]:
            w = node.uid
            if indices[w] is None:
                tarjan(w)
                low_link[v] = min(low_link[v], low_link[w])
            elif on_stack[w]:
                low_link[v] = min(low_link[v], indices[w])

        # create component of node v
        if low_link[v] == indices[v]:
            components[v] = set()
            while True:
                w = S.pop()
                on_stack[w] = False
                components[v].add(w)
                if v == w:
                    break

    # compute strongly connected components
    LOG.debug('Computing connected components')
    for v in tqdm(network.nodes.keys(), desc='component calculation'):
        if indices[v] is None:
            tarjan(v)

    LOG.debug('Mapping component sizes')
    return dict(zip(range(len(components)), components.values()))
def all_shortest_paths(network: Network,
                       weight: Union[str, bool, None] = None,
                       return_distance_matrix: bool = True) -> Union[defaultdict, Tuple[defaultdict, np.ndarray]]:
    """Calculates shortest paths between all pairs of nodes.

    .. note::

        Shortest paths are calculated using a custom implementation of
        the Floyd-Warshall algorithm.

    Parameters
    ----------
    network : Network

        The :py:class:`Network` object that contains the network

    weighted : bool

        If True cheapest paths will be calculated.

    Examples
    --------
    Generate a path and add it to the network.

    >>> import pathpy as pp
    >>> net = pp.Network()
    >>> net.add_edges(('a', 'x'), ('x', 'c'))
    >>> paths = pp.algorithms.shortest_paths.all_shortest_paths(net)
    >>> paths['a']['c']
    {('a', 'x', 'c')}

    Add additional path

    >>> net.add_edges(('a', 'y'), ('y', 'c'))
    >>> paths = pp.algorithms.shortest_paths.all_shortest_paths(net)
    >>> paths['a']['c']
    {('a', 'x', 'c'), ('a', 'y', 'c')}

    """

    dist: defaultdict = defaultdict(lambda: defaultdict(lambda: np.inf))
    s_p: defaultdict = defaultdict(lambda: defaultdict(set))

    for e in network.edges:
        cost = 1

        if weight == True:
            cost = e.attributes['weight']
        elif weight != False and weight != None:
            cost = e.attributes[weight]

        dist[e.v.uid][e.w.uid] = cost
        s_p[e.v.uid][e.w.uid].add((e.v.uid, e.w.uid))
        if not network.directed:
            dist[e.w.uid][e.v.uid] = cost
            s_p[e.w.uid][e.v.uid].add((e.w.uid, e.v.uid))

    for k in tqdm(network.nodes.keys(), desc='calculating shortest paths between all nodes'):
        for v in network.nodes.keys():
            for w in network.nodes.keys():
                if v != w:
                    if dist[v][w] > dist[v][k] + dist[k][w]:
                        # we have found a shorter path
                        dist[v][w] = dist[v][k] + dist[k][w]
                        s_p[v][w] = set()
                        for p in list(s_p[v][k]):
                            for q in list(s_p[k][w]):
                                s_p[v][w].add(p + q[1:])
                    elif dist[v][w] == dist[v][k] + dist[k][w]:
                        # we have found another shortest path
                        for p in list(s_p[v][k]):
                            for q in list(s_p[k][w]):
                                s_p[v][w].add(p + q[1:])

    for v in network.nodes.keys():
        dist[v][v] = 0
        s_p[v][v].add((v,))

    if return_distance_matrix:
        dist_arr = np.ndarray(
            shape=(network.number_of_nodes(), network.number_of_nodes()))
        for v in network.nodes:
            for w in network.nodes:
                dist_arr[network.nodes.index[v.uid],
                         network.nodes.index[w.uid]] = dist[v.uid][w.uid]
        return s_p, dist_arr
    else:
        return s_p
Beispiel #23
0
def from_dataframe(df: pd.DataFrame,
                   directed: bool = True,
                   loops: bool = True,
                   multiedges: bool = False,
                   **kwargs: Any) -> Network:
    """Reads a network from a pandas dataframe.

    By default, columns `v` and `w` will be used as source and target of
    edges. If no column 'v' or 'w' exists, the list of synonyms for `v` and
    `w`` in the config file will be used to remap columns, choosing the first
    matching entries. Any columns not used to create edges will be used as edge
    attributes, e.g. if a column 'v' is present and an additional column
    `source`is given, `source` will be assigned as an edge property.

    In addition, an optional column `uid` will be used to assign node uids. If
    this column is not present, default edge uids will be created.  Any other
    columns (e.g. weight, type, time, etc.) will be assigned as edge
    attributes. kwargs will be assigned as network attributes.

    Parameters
    ----------

    directed: bool

        Whether to generate a directed or undirected network.

    **kwargs: Any

        List of key-value pairs that will be assigned as network attributes

    Examples
    --------

    """

    # if no v/w columns are included, pick first synonym
    if 'v' not in df.columns:
        LOG.info('No column v, searching for synonyms')
        for col in df.columns:
            if col in config['edge']['v_synonyms']:
                LOG.info('Remapping column \'%s\' to \'v\'', col)
                df.rename(columns={col: "v"}, inplace=True)
                continue

    if 'w' not in df.columns:
        LOG.info('No column w, searching for synonyms')
        for col in df.columns:
            if col in config['edge']['w_synonyms']:
                LOG.info('Remapping column \'%s\' to \'w\'', col)
                df.rename(columns={col: "w"}, inplace=True)
                continue

            LOG.debug('Creating %s network', directed)

    net = Network(directed=directed, multiedges=multiedges, **kwargs)
    for row in df.to_dict(orient='records'):

        # get edge
        v = row.get('v', None)
        w = row.get('w', None)
        uid = row.get('uid', None)
        if v is None or w is None:
            LOG.error('DataFrame minimally needs columns \'v\' and \'w\'')
            raise IOError
        else:
            v = str(v)
            w = str(w)
        if v not in net.nodes.uids:
            net.add_node(v)
        if w not in net.nodes.uids:
            net.add_node(w)
        if uid is None:
            edge = Edge(net.nodes[v], net.nodes[w])
        else:
            edge = Edge(net.nodes[v], net.nodes[w], uid=uid)
        if loops or edge.v != edge.w:
            net.add_edge(edge)

        reserved_columns = set(['v', 'w', 'uid'])
        for k in row:
            if k not in reserved_columns:
                edge[k] = row[k]
    return net
Beispiel #24
0
def read_graphml(filename: str):
    """Reads a pathyp.Network from a graphml file. This function supports typed Node and Edge attributes 
    including default values. 
    
    Warnings are issued if the type of Node or Edge attributes are undeclared,  in which case the attribute type will fall back to string.

    Parameters
    ----------

    filename: str
        The graphml file to read the graph from
    
    """
    root = ET.parse(filename).getroot()

    graph = root.find('{http://graphml.graphdrawing.org/xmlns}graph')
    directed = graph.attrib['edgedefault'] != 'undirected'
    uid = graph.attrib['id']
    n = Network(directed=directed, uid=uid)

    node_attributes = {}
    edge_attributes = {}

    # read attribute types and default values
    for a in root.findall('{http://graphml.graphdrawing.org/xmlns}key'):
        a_id = a.attrib['id']
        a_name = a.attrib['attr.name']
        a_type = a.attrib['attr.type']
        a_for = a.attrib['for']

        # store attribute info and assign data types
        a_data = {'name': a_name}
        if a_type == 'string':
            a_data['type'] = str
        elif a_type == 'float':
            a_data['type'] = float
        elif a_type == 'double':
            a_data['type'] = float
        elif a_type == 'int':
            a_data['type'] = int
        elif a_type == 'long':
            a_data['type'] = int
        elif a_type == 'boolean':
            a_data['type'] = bool
        else:
            a_data['type'] = str

        d = a.find('{http://graphml.graphdrawing.org/xmlns}default')
        if d is not None:
            a_data['default'] = a_data['type'](d.text)

        if a_for == 'node':
            node_attributes[a_name] = a_data
        if a_for == 'edge':
            edge_attributes[a_name] = a_data

    # add nodes with uids and attributes
    for node in graph.findall('{http://graphml.graphdrawing.org/xmlns}node'):
        # create node
        uid = node.attrib['id']
        v = Node(uid=uid)

        # set attribute values
        for a in node.findall('{http://graphml.graphdrawing.org/xmlns}data'):
            key = a.attrib['key']
            val = a.text
            if key not in node_attributes:
                LOG.warning(
                    'Undeclared Node attribute "{}". Defaulting to string type.'
                    .format(key))
                v.attributes[key] = val
            else:
                v.attributes[key] = node_attributes[key]['type'](val)

        # set default values
        for a_name in node_attributes:
            if 'default' in node_attributes[
                    a_name] and v.attributes[a_name] is None:
                v.attributes[a_name] = node_attributes[a_name]['default']
        n.add_node(v)

    # add edges with uids and attributes
    for edge in graph.findall('{http://graphml.graphdrawing.org/xmlns}edge'):
        # create edge
        source = edge.attrib['source']
        target = edge.attrib['target']
        uid = edge.attrib['id']
        e = Edge(n.nodes[source], n.nodes[target], uid=uid)

        # set attribute values
        for a in edge.findall('{http://graphml.graphdrawing.org/xmlns}data'):
            key = a.attrib['key']
            val = a.text
            if key not in edge_attributes:
                LOG.warning(
                    'Warning: Undeclared Edge attribute "{}". Defaulting to string type.'
                    .format(key))
                e.attributes[key] = val
            else:
                e.attributes[key] = edge_attributes[key]['type'](val)
        # set default values
        for a_name in edge_attributes:
            if 'default' in edge_attributes[
                    a_name] and e.attributes[a_name] is None:
                e.attributes[a_name] = edge_attributes[a_name]['default']
        n.add_edge(e)
    return n
Beispiel #25
0
def is_connected(network: Network) -> bool:
    """Returns whether the network is (strongly) connected
    """
    return largest_component_size(network) == network.number_of_nodes()
Beispiel #26
0
def ER_np(n: int,
          p: float,
          directed: bool = False,
          loops: bool = False,
          node_uids: Optional[list] = None) -> Network:
    """(n, p) Erdös-Renyi model

    Generates a random graph with a fixed number of n nodes and edge probability
    p based on the Erdös-Renyi model.

    Parameters
    ----------
    n : int

        The number of nodes in the generated network

    p : float

        The probability with which an edge will be created
        between each pair of nodes

    directed : bool

        Whether a directed network should be generated

    loops : bool

        Whether or not the generated network may contain
        loops.

    node_uids : list

        Optional list of node uids that will be used.

    Examples
    --------
    Generate random undirected network with 10 nodes

    >>> import pathpy as pp
    >>> random_graph = pp.algorithms.random_graphs.ER_np(n=10, p=0.03)
    >>> print(random_graph.summary())
    ...

    """
    network = Network(directed=directed)

    if node_uids is None or len(node_uids) != n:
        LOG.info('No valid node uids given, generating numeric node uids')
        node_uids = []
        for i in range(n):
            node_uids.append(str(i))

    for i in range(n):
        network.add_node(node_uids[i])

    for s in tqdm(range(n), 'generating G(n,p) network'):
        if directed:
            x = n
        else:
            x = s + 1
        for t in range(x):
            if t == s and not loops:
                continue
            if np.random.random_sample() < p:
                network.add_edge(node_uids[s], node_uids[t])
    return network
Beispiel #27
0
def Watts_Strogatz(n: int,
                   s: int,
                   p: float = 0.0,
                   loops: bool = False,
                   node_uids: Optional[list] = None) -> Network:
    """Undirected Watts-Strogatz lattice network

    Generates an undirected Watts-Strogatz lattice network with lattice
    dimensionality one.

    Parameters
    ----------
    n : int

        The number of nodes in the generated network

    s : float

        The number of nearest neighbors that will be connected
        in the ring lattice

    p : float

        The rewiring probability

    Examples
    --------
    Generate a Watts-Strogatz network with 100 nodes

    >>> import pathpy as pp
    >>> small_world = pp.algorithms.random_graphs.Watts_Strogatz(n=100, s=2, p=0.1)
    >>> print(small_world.summary())
    ...

    """
    network = Network(directed=False)
    if node_uids is None or len(node_uids) != n:
        LOG.info('No valid node uids given, generating numeric node uids')
        node_uids = []
        for i in range(n):
            network.add_node(Node(str(i)))
            node_uids.append(str(i))
    else:
        for i in range(n):
            network.add_node(node_uids[i])

    # construct a ring lattice (dimension 1)
    for i in range(n):
        if loops:
            x = 0
            y = s
        else:
            x = 1
            y = s + 1
        for j in range(x, y):
            v = network.nodes[node_uids[i]]
            w = network.nodes[node_uids[(i + j) % n]]
            if (v.uid, w.uid) not in network.edges:
                network.add_edge(v, w)

    if p == 0:
        # nothing to do here
        return network

    # Rewire each link with probability p
    for edge in tqdm(list(network.edges.values()), 'generating WS network'):
        if np.random.rand() < p:
            # Delete original link and remember source node
            v = edge.v.uid
            network.remove_edge(edge)

            # Find new random tgt, which is not yet connected to src
            new_target = None

            # This loop repeatedly chooses a random target until we find
            # a target not yet connected to src. Note that this could potentially
            # result in an infinite loop depending on parameters.
            while new_target is None:
                x = str(np.random.randint(n))
                if (x != v or loops) and (v, x) not in network.edges:
                    new_target = x
            network.add_edge(v, new_target)
    return network
Beispiel #28
0
def ER_nm(n: int,
          m: int,
          directed: bool = False,
          loops: bool = False,
          multiedges: bool = False,
          node_uids: Optional[list] = None) -> Union[Network, None]:
    """(n, m) Erdös-Renyi model.

    Generates a random graph with a fixed number of n nodes and m edges based on
    the Erdös-Renyi model.

    Parameters
    ----------
    n : int

        The number of nodes in the generated network

    m : int

        The number of randomly generated edges in the network

    directed : bool

        Whether a directed network should be generated

    loops : bool

        Whether or not the generated network may contain
        loops.

    multi_edge : bool

        Whether or not the same edge can be added multiple times

    node_uids : list

        Optional list of node uids that will be used.

    Examples
    --------
    Generate random undirected network with 10 nodes and 25 edges

    >>> import pathpy as pp
    >>> random_graph = pp.algorithms.random_graphs.ER_nm(n=10, m=25)
    >>> print(random_graph.summary())
    ...

    """
    # Check parameter sanity
    M = max_edges(n, directed=directed, loops=loops, multiedges=multiedges)
    if m > M:
        LOG.error('Given network type with n nodes can have at most {} edges.'.
                  format(M))
        return None

    network = Network(directed=directed)

    if node_uids is None or len(node_uids) != n:
        LOG.info('No valid node uids given, generating numeric node uids')
        node_uids = []
        for i in range(n):
            node_uids.append(str(i))

    for i in range(n):
        network.add_node(node_uids[i])

    edges = 0
    while edges < m:
        v, w = np.random.choice(node_uids, size=2, replace=loops)
        if multiedges or network.nodes[w] not in network.successors[v]:
            network.add_edge(v, w)
            edges += 1
    return network
Beispiel #29
0
def Molloy_Reed(degrees: Union[np.array, Dict[str, float]],
                multiedge: bool = False,
                relax: bool = False,
                node_uids: Optional[list] = None) -> Network:
    """Generate Molloy-Reed graph.

    Generates a random undirected network with given degree sequence based on
    the Molloy-Reed algorithm.

    .. note::

        The condition proposed by Erdös and Gallai (1967) is used to test
        whether the degree sequence is graphic, i.e. whether a network with the
        given degree sequence exists.

    Parameters
    ----------
    degrees : list

        List of integer node degrees. The number of nodes of the generated
        network corresponds to len(degrees).

    relax : bool

        If True, we conceptually allow self-loops and multi-edges, but do not
        add them to the network This implies that the generated network may not
        have exactly sum(degrees)/2 links, but it ensures that the algorithm
        always finishes.

    Examples
    --------
    Generate random undirected network with given degree sequence

    >>> import pathpy as pp
    >>> random_network = pp.algorithms.random_graphs.Molloy_Reed([1,0])
    >>> print(random_network.summary())
    ...

    Network generation fails for non-graphic sequences

    >>> import pathpy as pp
    >>> random_network = pp.algorithms.random_graphs.Molloy_Reed([1,0])
    >>> print(random_network)
    None

    """

    # assume that we are given a graphical degree sequence
    if not is_graphic_Erdos_Gallai(degrees):
        return

    # create empty network with n nodes
    n = len(degrees)
    network = Network(directed=False, multiedges=multiedge)

    if node_uids is None or len(node_uids) != n:
        LOG.info('No valid node uids given, generating numeric node uids')
        node_uids = []
        for i in range(n):
            node_uids.append(str(i))

    for i in range(n):
        network.add_node(node_uids[i])

    # generate link stubs based on degree sequence
    stubs = []
    for i in range(n):
        for k in range(int(degrees[i])):
            stubs.append(str(node_uids[i]))

    # connect randomly chosen pairs of link stubs
    while (len(stubs) > 0):
        v, w = np.random.choice(stubs, 2, replace=False)

        if v == w or (multiedge == False and relax == False
                      and network.nodes[w] in network.successors[v]):
            # remove random edge and add stubs
            if network.number_of_edges() > 0:
                edge = np.random.choice(list(network.edges))
                stubs.append(edge.v.uid)
                stubs.append(edge.w.uid)
                network.remove_edge(edge)
        else:
            if not network.nodes[w] in network.successors[v]:
                network.add_edge(v, w)
            stubs.remove(v)
            stubs.remove(w)

    return network