Beispiel #1
0
def kats_in_query_set( G , Q = list(G.nodes) , alpha = None ):
    #here the same idea of eigenvector_centrality is used adjusted with the requirements for kats centrality
    max_iter = 100
    treshold = 1.0e-6
    nnodes = len(G.nodes)
    if alpha is None :
        #alpha is set as the inverse of the largest eigenvalue of A^T
        L = nx.normalized_laplacian_matrix(G)
        e = numpy.linalg.eigvals(L.A)
        alpha = 1.0 /  max(e)
    x = dict([(n,1.0) for n in G])
    for i in range(max_iter):
        xlast = x    #save the x(t-1) vector
        x = dict.fromkeys(xlast, 0)
        # do the multiplication x^T = (alpha x^T AQ )+1
        for n in x: #n is each node
            for nbr in G[n]: # nbr iterate over the neighbors of n, G[n] in fact return the neighbors of n
                Aij = 0
                if nbr in Q or n in Q :
                    Aij = G[n][nbr].get("weight", 1)   #here we add the weight only if the edge (n , nbr) belogns to Q
                x[nbr] += alpha * xlast[n] * Aij + 1
        # normalize vector
        try:
            s = 1.0/sqrt(sum( v**2 for v in x.values() ))
        # this should never be zero?
        except ZeroDivisionError:
            s = 1.0
        for n in x:
            x[n] *= s
        # check convergence
        err = sum([abs(x[n]-xlast[n]) for n in x])
        if err < nnodes*treshold:
            return x
    return nx.PowerIterationFailedConvergence(max_iter)
Beispiel #2
0
def pagerank(G,
             alpha=0.85,
             personalization=None,
             max_iter=100,
             tol=1.0e-6,
             nstart=None,
             weight='weight',
             dangling=None):
    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    # Choose fixed starting vector if not given
    if nstart is None:
        x = dict.fromkeys(W, 1.0 / N)
    else:
        # Normalized nstart vector
        s = float(sum(nstart.values()))
        x = dict((k, v / s) for k, v in nstart.items())

    if personalization is None:
        # Assign uniform personalization vector if not given
        p = dict.fromkeys(W, 1.0 / N)
    else:
        s = float(sum(personalization.values()))
        p = dict((k, v / s) for k, v in personalization.items())

    if dangling is None:
        # Use personalization vector if dangling vector not specified
        dangling_weights = p
    else:
        s = float(sum(dangling.values()))
        dangling_weights = dict((k, v / s) for k, v in dangling.items())
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
        for n in x:
            # this matrix multiply looks odd because it is
            # doing a left multiply x^T=xlast^T*W
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
            x[n] += danglesum * dangling_weights.get(
                n, 0) + (1.0 - alpha) * p.get(n, 0)
        # check convergence, l1 norm
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N * tol:
            return x
    raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #3
0
def pagerank_edgetypes_indirect(D, edgetype_scale, indirect_nodes, max_iter=100, tol=1.0e-6, weight='weight'):
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    direct_nodes = [a for a in W if a not in indirect_nodes]
    x = dict.fromkeys(direct_nodes, 1.0 / len(direct_nodes))
    p = dict.fromkeys(direct_nodes, 1.0 / len(direct_nodes))

    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0.0)
        weight_to_distribute = sum([(xlast[n] * W[n][nbr]['weight'] * edgetype_scale[W[n][nbr]['type']]) for n in x for nbr in W[n]])
        undistributed_weight = 1 - weight_to_distribute
        for n in x:
            for nbr in W[n]:
                if nbr in indirect_nodes:
                    contribution = xlast[n] * W[n][nbr][weight] * edgetype_scale[W[n][nbr]['type']] / len(W[nbr])
                    for nbr_adj in W[nbr]:
                        x[nbr_adj] += contribution
                else:
                    x[nbr] += xlast[n] * W[n][nbr][weight] * edgetype_scale[W[n][nbr]['type']]
            x[n] += undistributed_weight * p.get(n, 0)

        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N * tol:
            return x
    raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #4
0
def salsa_algorithm(graph, max_iter=1000, tolerance=1.0e-7, normalized=True):
    """ We used networkx's Hits algorithm. We changed variables' names and made the appropriate modifications"""
    if len(graph) == 0:
        raise ValueError("Graph has zero nodes")

    adjacency_matrix = nx.to_scipy_sparse_matrix(graph, nodelist=list(graph))  # A from slides
    (number_rows, number_cols) = adjacency_matrix.shape  # should be square
    adjacency_matrix_c = normalize(adjacency_matrix, norm='l1', axis=0)  # Normalize adjacency_matrix (columns)
    adjacency_matrix_r = normalize(adjacency_matrix, norm='l1', axis=1)  # Normalize adjacency_matrix (rows)
    authority_matrix = adjacency_matrix_r.T * adjacency_matrix_c  # authority matrix - ArT * Ac from slides
    a = scipy.ones((number_rows, 1)) / number_rows  # initial guess

    # power iteration on authority matrix
    iteration_index = 0
    while True:
        last_a = a
        a = authority_matrix * a
        a = a / a.max()
        # check convergence, l1 norm
        err = scipy.absolute(a - last_a).sum()
        if err < tolerance:
            break
        if iteration_index > max_iter:
            raise nx.PowerIterationFailedConvergence(max_iter)
        iteration_index += 1
    a = np.asarray(a).flatten()
    h = np.asarray(adjacency_matrix_c * a).flatten()  # h=adjacency_matrix_c*a
    if normalized:
        h = h / h.sum()
        a = a / a.sum()
    hubs = dict(zip(graph, map(float, h)))
    authorities = dict(zip(graph, map(float, a)))
    return hubs, authorities
Beispiel #5
0
def eigenvector_in_query_set( G , Q=list(G.nodes) ):
    #part of this code is from https://networkx.github.io/documentation/stable/_modules/networkx/algorithms/centrality/eigenvector.html
    #reshaped to fit this purposes with Q query set
    max_iter = 100
    treshold = 1.0e-6
    #x is the starting vector
    x = dict([(n,1.0/len(G)) for n in G])
    # normalize starting vector
    s = 1.0/sum(x.values())
    for k in x:
        x[k] *= s
    nnodes = len(G.nodes)
    for i in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast, 0)
        # do the multiplication y^T = x^T AQ
        for n in x: #n is each node
            for nbr in G[n]: # nbr iterate over the neighbors of n
                Aij = 0
                if nbr in Q or n in Q :
                    Aij = G[n][nbr].get("weight", 1) #here we add the weight only if the edge (n , nbr) belogns to Q
                x[nbr] += xlast[n] * Aij
        # normalize vector
        try:
            s = 1.0/sqrt(sum(v**2 for v in x.values()))
        # this should never be zero?
        except ZeroDivisionError:
            s = 1.0
        for n in x:
            x[n] *= s
        # check convergence
        err = sum([abs(x[n]-xlast[n]) for n in x])
        if err < nnodes*treshold:
            return x
    return nx.PowerIterationFailedConvergence(max_iter)
Beispiel #6
0
def my_hits_alg(graph, max_iter=1000, tol=1.0e-6, nstart=None):
    # check if the graph is empty
    if len(graph) == 0:
        return {}, {}

    # if no starting node is given, choose one
    if nstart is None:
        hub_scores = dict.fromkeys(graph, 1.0 / graph.number_of_nodes())
    else:
        hub_scores = nstart

        # normalize starting vector
        normalizer = 1.0 / sum(hub_scores.values())
        for k in hub_scores:
            hub_scores[k] *= normalizer

    for _ in range(
            max_iter):  # power iteration: make up to max_iter iterations
        hlast = hub_scores
        hub_scores = dict.fromkeys(hlast.keys(), 0)
        authority_scores = dict.fromkeys(hlast.keys(), 0)

        # matrix multiplication to calculate scores
        for n in hub_scores:
            for m in graph[n]:
                authority_scores[m] += hlast[n] * graph[n][m].get('weight', 1)
        for n in hub_scores:
            for m in graph[n]:
                hub_scores[n] += authority_scores[m] * graph[n][m].get(
                    'weight', 1)

        # normalize scores
        normalizer = 1.0 / max(hub_scores.values())
        for n in hub_scores:
            hub_scores[n] *= normalizer
        normalizer = 1.0 / max(authority_scores.values())
        for n in authority_scores:
            authority_scores[n] *= normalizer

        # check convergence, l1 norm
        err = sum([abs(hub_scores[n] - hlast[n]) for n in hub_scores])
        if err < tol:
            print('Iterations: ', _)
            break

    # if the algorithm fails to converge within the max number of iterations
    else:
        raise nx.PowerIterationFailedConvergence(max_iter)

    # normalize scores
    normalizer = 1.0 / sum(authority_scores.values())
    for n in authority_scores:
        authority_scores[n] *= normalizer
    normalizer = 1.0 / sum(hub_scores.values())
    for n in hub_scores:
        hub_scores[n] *= normalizer

    return hub_scores, authority_scores
Beispiel #7
0
def katz_centrality(
    G,
    alpha=0.1,
    beta=1.0,
    max_iter=1000,
    tol=1.0e-6,
    nstart=None,
    normalized=True,
    weight=None,
):
    if len(G) == 0:
        return {}

    nnodes = G.number_of_nodes()

    if nstart is None:
        # choose starting vector with entries of 0
        x = {n: 0 for n in G}
    else:
        x = nstart

    try:
        b = dict.fromkeys(G, float(beta))
    except (TypeError, ValueError, AttributeError) as e:
        b = beta
        if set(beta) != set(G):
            raise nx.NetworkXError("beta dictionary "
                                   "must have a value for every node") from e


# make up to max_iter iterations
    for i in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast, 0)
        # do the multiplication y^T = Alpha * x^T A - Beta
        for n in x:
            for nbr in G[n]:
                x[nbr] += xlast[n] * G[n][nbr].get(weight, 1)
        for n in x:
            x[n] = alpha * x[n] + b[n]

        # check convergence
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < nnodes * tol:
            if normalized:
                # normalize vector
                try:
                    s = 1.0 / sqrt(sum(v**2 for v in x.values()))
                # this should never be zero?
                except ZeroDivisionError:
                    s = 1.0
            else:
                s = 1
            for n in x:
                x[n] *= s
            return x
    raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #8
0
def pagerank(G,
             alpha=0.85,
             personalization=None,
             max_iter=100,
             tol=1.0e-6,
             nstart=None,
             weight="weight",
             dangling=None):
    # 节点的pagerank值
    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    if nstart is None:
        x = dict.fromkeys(W, 1.0 / N)
    else:
        s = float(sum(nstart.values()))
        x = {k: v / s for k, v in nstart.items()}

    if personalization is None:
        p = dict.fromkeys(W, 1.0 / N)
    else:
        s = float(sum(personalization.values()))
        p = {k: v / s for k, v in personalization.items()}

    if dangling is None:
        dangling_weights = p
    else:
        s = float(sum(dangling.values()))
        dangling_weights = {k: v / s for k, v in dangling.items()}
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
        for n in x:
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
            x[n] += danglesum * dangling_weights.get(
                n, 0) + (1.0 - alpha) * p.get(n, 0)
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N * tol:
            return x
    raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #9
0
def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
    if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
        raise Exception("hits() not defined for graphs with multiedges.")
    if len(G) == 0:
        return {}, {}
    # choose fixed starting vector if not given
    if nstart is None:
        h = dict.fromkeys(G, 1.0 / G.number_of_nodes())
    else:
        h = nstart
        # normalize starting vector
        s = 1.0 / sum(h.values())
        for k in h:
            h[k] *= s
    for _ in range(
            max_iter):  # power iteration: make up to max_iter iterations
        hlast = h
        h = dict.fromkeys(hlast.keys(), 0)
        a = dict.fromkeys(hlast.keys(), 0)
        # this "matrix multiply" looks odd because it is
        # doing a left multiply a^T=hlast^T*G
        for n in h:
            for nbr in G[n]:
                a[nbr] += hlast[n] * G[n][nbr].get("weight", 1)
        # now multiply h=Ga
        for n in h:
            for nbr in G[n]:
                h[n] += a[nbr] * G[n][nbr].get("weight", 1)
        # normalize vector
        s = 1.0 / max(h.values())
        for n in h:
            h[n] *= s
        # normalize vector
        s = 1.0 / max(a.values())
        for n in a:
            a[n] *= s
        # check convergence, l1 norm
        err = sum([abs(h[n] - hlast[n]) for n in h])
        if err < tol:
            break
    else:
        raise nx.PowerIterationFailedConvergence(max_iter)
    if normalized:
        s = 1.0 / sum(a.values())
        for n in a:
            a[n] *= s
        s = 1.0 / sum(h.values())
        for n in h:
            h[n] *= s
    return h, a
Beispiel #10
0
def katz_centrality(G,
                    alpha,
                    beta=1.0,
                    max_iter=1000,
                    tol=1.0e-6,
                    weight=None,
                    normalized=True):
    if len(G) == 0:
        return {}

    nodes = G.number_of_nodes()
    dict_of_nodes = dict([(n, 0) for n in G])

    try:
        beta_dictionary = dict.fromkeys(G, float(beta))
    except (TypeError, ValueError, AttributeError):
        beta_dictionary = beta
        if set(beta) != set(G):
            raise nx.NetworkXError(
                'beta dictionary must have a value for every node')

    for i in range(max_iter):
        dict_of_nodes_helper = dict_of_nodes
        dict_of_nodes = dict.fromkeys(dict_of_nodes_helper, 0)
        for n in dict_of_nodes:
            for nbr in G[n]:
                dict_of_nodes[nbr] += dict_of_nodes_helper[n] * G[n][nbr].get(
                    weight, 1)
        for n in dict_of_nodes:
            dict_of_nodes[n] = alpha * dict_of_nodes[n] + beta_dictionary[n]
        err = sum([
            abs(dict_of_nodes[n] - dict_of_nodes_helper[n])
            for n in dict_of_nodes
        ])
        if err < nodes * tol:
            if normalized:
                min_item = min(dict_of_nodes.values())

                max_item = max(dict_of_nodes.values())
                for k, v in dict_of_nodes.items():
                    if dict_of_nodes[k] == min_item:
                        dict_of_nodes[k] = 0.0
                        continue
                    dict_of_nodes[k] = v / max_item

            return dict_of_nodes
    raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #11
0
    def eigenvector_centrality(G,
                               max_iter=100,
                               tol=1.0e-6,
                               nstart=None,
                               weight=None):
        '''compute eigenvector_centrality'''

        if len(G) == 0:
            raise nx.NetworkXPointlessConcept(
                'cannot compute centrality for the'
                ' null graph')
        # If no initial vector is provided, start with the all-ones vector.
        if nstart is None:
            nstart = {v: 1 for v in G}
        if all(v == 0 for v in nstart.values()):
            raise nx.NetworkXError(
                'initial vector cannot have all zero values')
        # Normalize the initial vector so that each entry is in [0, 1]. This is
        # guaranteed to never have a divide-by-zero error by the previous line.
        nstart_sum = sum(nstart.values())
        x = {k: v / nstart_sum for k, v in nstart.items()}
        nnodes = G.number_of_nodes()
        # make up to max_iter iterations
        for i in range(max_iter):
            xlast = x
            x = xlast.copy()  # Start with xlast times I to iterate with (A+I)
            # do the multiplication y^T = x^T A (left eigenvector)
            for n in x:
                for nbr in G[n]:
                    w = G[n][nbr].get(weight, 1) if weight else 1
                    x[nbr] += xlast[n] * w
            # Normalize the vector. The normalization denominator `norm`
            # should never be zero by the Perron--Frobenius
            # theorem. However, in case it is due to numerical error, we
            # assume the norm to be one instead.
            norm = sqrt(sum(z**2 for z in x.values())) or 1
            x = {k: v / norm for k, v in x.items()}
            # Check for convergence (in the L_1 norm).
            if sum(abs(x[n] - xlast[n]) for n in x) < nnodes * tol:
                return x
        raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #12
0
def pagerank_edgetypes(D, edgetype_scale, max_iter=100, tol=1.0e-6, weight='weight'):
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    x = dict.fromkeys(W, 1.0 / N)
    p = dict.fromkeys(W, 1.0 / N)

    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0.0)
        weight_to_distribute = sum([(xlast[n] * W[n][nbr]['weight'] * edgetype_scale[W[n][nbr]['type']]) for n in x for nbr in W[n]])
        undistributed_weight = 1 - weight_to_distribute
        for n in x:
            for nbr in W[n]:
                x[nbr] += xlast[n] * W[n][nbr][weight] * edgetype_scale[W[n][nbr]['type']]
            x[n] += undistributed_weight * p.get(n, 0)

        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N * tol:
            return x
    raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #13
0
def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
    # 节点的hub值和authority值
    if len(G) == 0:
        return {}, {}
    if nstart is None:
        h = dict.fromkeys(G, 1.0 / G.number_of_nodes())
    else:
        h = nstart
        s = 1.0 / sum(h.values())
        for k in h:
            h[k] *= s
    for _ in range(max_iter):
        hlast = h
        h = dict.fromkeys(hlast.keys(), 0)
        a = dict.fromkeys(hlast.keys(), 0)
        for n in h:
            for nbr in G[n]:
                a[nbr] += hlast[n] * G[n][nbr].get("weight", 1)
        for n in h:
            for nbr in G[n]:
                h[n] += a[nbr] * G[n][nbr].get("weight", 1)
        s = 1.0 / max(h.values())
        for n in h:
            h[n] *= s
        s = 1.0 / max(a.values())
        for n in a:
            a[n] *= s
        err = sum([abs(h[n] - hlast[n]) for n in h])
        if err < tol:
            break
    else:
        raise nx.PowerIterationFailedConvergence(max_iter)
    if normalized:
        s = 1.0 / sum(a.values())
        for n in a:
            a[n] *= s
        s = 1.0 / sum(h.values())
        for n in h:
            h[n] *= s
    return h, a
Beispiel #14
0
def pagerank_scipy(G,
                   alpha=0.85,
                   personalization=None,
                   max_iter=100,
                   tol=1.0e-6,
                   weight='weight',
                   dangling=None):
    """Return the PageRank of the nodes in the graph.

    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
    an algorithm to rank web pages.

    Parameters
    ----------
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.

    alpha : float, optional
      Damping parameter for PageRank, default=0.85.

    personalization: dict, optional
       The "personalization vector" consisting of a dictionary with a
       key for every graph node and nonzero personalization value for each
       node. By default, a uniform distribution is used.

    max_iter : integer, optional
      Maximum number of iterations in power method eigenvalue solver.

    tol : float, optional
      Error tolerance used to check convergence in power method solver.

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified) This must be selected to result in an irreducible transition
      matrix (see notes under google_matrix). It may be common to have the
      dangling dict to be the same as the personalization dict.

    Returns
    -------
    pagerank : dictionary
       Dictionary of nodes with PageRank as value

    Examples
    --------
    >>> G = nx.DiGraph(nx.path_graph(4))
    >>> pr = nx.pagerank_scipy(G, alpha=0.9)

    Notes
    -----
    The eigenvector calculation uses power iteration with a SciPy
    sparse matrix representation.

    This implementation works with Multi(Di)Graphs. For multigraphs the
    weight between two nodes is set to be the sum of all edge weights
    between those nodes.

    See Also
    --------
    pagerank, pagerank_numpy, google_matrix

    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
       The PageRank citation ranking: Bringing order to the Web. 1999
       http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
    """
    import scipy.sparse

    N = len(G)
    if N == 0:
        return {}

    nodelist = list(G)
    M = nx.to_scipy_sparse_matrix(G,
                                  nodelist=nodelist,
                                  weight=weight,
                                  dtype=float)
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M

    # initial vector
    x = scipy.repeat(1.0 / N, N)

    # Personalization vector
    if personalization is None:
        p = scipy.repeat(1.0 / N, N)
    else:
        missing = set(nodelist) - set(personalization)
        if missing:
            raise NetworkXError('Personalization vector dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        p = scipy.array([personalization[n] for n in nodelist], dtype=float)
        p = p / p.sum()

    # Dangling nodes
    if dangling is None:
        dangling_weights = p
    else:
        missing = set(nodelist) - set(dangling)
        if missing:
            raise NetworkXError('Dangling node dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        # Convert the dangling dictionary into an array in nodelist order
        dangling_weights = scipy.array([dangling[n] for n in nodelist],
                                       dtype=float)
        dangling_weights /= dangling_weights.sum()
    is_dangling = scipy.where(S == 0)[0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + \
            (1 - alpha) * p
        # check convergence, l1 norm
        err = scipy.absolute(x - xlast).sum()
        if err < N * tol:
            return dict(zip(nodelist, map(float, x)))
    raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #15
0
def pagerank(G,
             alpha=0.85,
             personalization=None,
             max_iter=100,
             tol=1.0e-6,
             nstart=None,
             weight='weight',
             dangling=None):
    """Return the PageRank of the nodes in the graph.

    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
    an algorithm to rank web pages.

    Parameters
    ----------
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.

    alpha : float, optional
      Damping parameter for PageRank, default=0.85.

    personalization: dict, optional
      The "personalization vector" consisting of a dictionary with a
      key for every graph node and nonzero personalization value for each node.
      By default, a uniform distribution is used.

    max_iter : integer, optional
      Maximum number of iterations in power method eigenvalue solver.

    tol : float, optional
      Error tolerance used to check convergence in power method solver.

    nstart : dictionary, optional
      Starting value of PageRank iteration for each node.

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified). This must be selected to result in an irreducible transition
      matrix (see notes under google_matrix). It may be common to have the
      dangling dict to be the same as the personalization dict.

    Returns
    -------
    pagerank : dictionary
       Dictionary of nodes with PageRank as value

    Examples
    --------
    >>> G = nx.DiGraph(nx.path_graph(4))
    >>> pr = nx.pagerank(G, alpha=0.9)

    Notes
    -----
    The eigenvector calculation is done by the power iteration method
    and has no guarantee of convergence.  The iteration will stop after
    an error tolerance of ``len(G) * tol`` has been reached. If the
    number of iterations exceed `max_iter`, a
    :exc:`networkx.exception.PowerIterationFailedConvergence` exception
    is raised.

    The PageRank algorithm was designed for directed graphs but this
    algorithm does not check if the input graph is directed and will
    execute on undirected graphs by converting each edge in the
    directed graph to two edges.

    See Also
    --------
    pagerank_numpy, pagerank_scipy, google_matrix

    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
       The PageRank citation ranking: Bringing order to the Web. 1999
       http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf

    """
    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    # Choose fixed starting vector if not given
    if nstart is None:
        x = dict.fromkeys(W, 1.0 / N)
    else:
        # Normalized nstart vector
        s = float(sum(nstart.values()))
        x = dict((k, v / s) for k, v in nstart.items())

    if personalization is None:
        # Assign uniform personalization vector if not given
        p = dict.fromkeys(W, 1.0 / N)
    else:
        missing = set(G) - set(personalization)
        if missing:
            raise NetworkXError('Personalization dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        s = float(sum(personalization.values()))
        p = dict((k, v / s) for k, v in personalization.items())

    if dangling is None:
        # Use personalization vector if dangling vector not specified
        dangling_weights = p
    else:
        missing = set(G) - set(dangling)
        if missing:
            raise NetworkXError('Dangling node dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        s = float(sum(dangling.values()))
        dangling_weights = dict((k, v / s) for k, v in dangling.items())
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
        for n in x:
            # this matrix multiply looks odd because it is
            # doing a left multiply x^T=xlast^T*W
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
        # check convergence, l1 norm
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N * tol:
            return x
    raise nx.PowerIterationFailedConvergence(max_iter)
def eigenvector_centrality(G,
                           max_iter=100,
                           tol=1.0e-6,
                           nstart=None,
                           weight=None):
    r"""Compute the eigenvector centrality for the graph `G`.

    Eigenvector centrality computes the centrality for a node based on the
    centrality of its neighbors. The eigenvector centrality for node $i$ is

    .. math::

        Ax = \lambda x

    where $A$ is the adjacency matrix of the graph `G` with eigenvalue
    $\lambda$. By virtue of the Perron–Frobenius theorem, there is
    a unique and positive solution if $\lambda$ is the largest
    eigenvalue associated with the eigenvector of the adjacency matrix
    $A$ ([2]_).

    Parameters
    ----------
    G : graph
      A networkx graph

    max_iter : integer, optional (default=100)
      Maximum number of iterations in power method.

    tol : float, optional (default=1.0e-6)
      Error tolerance used to check convergence in power method iteration.

    nstart : dictionary, optional (default=None)
      Starting value of eigenvector iteration for each node.

    weight : None or string, optional (default=None)
      If None, all edge weights are considered equal.
      Otherwise holds the name of the edge attribute used as weight.

    Returns
    -------
    nodes : dictionary
       Dictionary of nodes with eigenvector centrality as the value.

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> centrality = nx.eigenvector_centrality(G)
    >>> sorted((v, '{:0.2f}'.format(c)) for v, c in centrality.items())
    [(0, '0.37'), (1, '0.60'), (2, '0.60'), (3, '0.37')]

    Raises
    ------
    NetworkXPointlessConcept
        If the graph `G` is the null graph.

    NetworkXError
        If each value in `nstart` is zero.

    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    See Also
    --------
    eigenvector_centrality_numpy
    pagerank
    hits

    Notes
    -----
    The measure was introduced by [1]_ and is discussed in [2]_.

    The power iteration method is used to compute the eigenvector and
    convergence is **not** guaranteed. Our method stops after ``max_iter``
    iterations or when the change in the computed vector between two
    iterations is smaller than an error tolerance of
    ``G.number_of_nodes() * tol``. This implementation uses ($A + I$)
    rather than the adjacency matrix $A$ because it shifts the spectrum
    to enable discerning the correct eigenvector even for networks with
    multiple dominant eigenvalues.

    For directed graphs this is "left" eigenvector centrality which corresponds
    to the in-edges in the graph. For out-edges eigenvector centrality
    first reverse the graph with ``G.reverse()``.

    References
    ----------
    .. [1] Phillip Bonacich.
       "Power and Centrality: A Family of Measures."
       *American Journal of Sociology* 92(5):1170–1182, 1986
       <http://www.leonidzhukov.net/hse/2014/socialnetworks/papers/Bonacich-Centrality.pdf>
    .. [2] Mark E. J. Newman.
       *Networks: An Introduction.*
       Oxford University Press, USA, 2010, pp. 169.

    """
    if len(G) == 0:
        raise nx.NetworkXPointlessConcept('cannot compute centrality for the'
                                          ' null graph')
    # If no initial vector is provided, start with the all-ones vector.
    if nstart is None:
        nstart = {v: 1 for v in G}
    if all(v == 0 for v in nstart.values()):
        raise nx.NetworkXError('initial vector cannot have all zero values')
    # Normalize the initial vector so that each entry is in [0, 1]. This is
    # guaranteed to never have a divide-by-zero error by the previous line.
    x = {k: v / sum(nstart.values()) for k, v in nstart.items()}
    nnodes = G.number_of_nodes()
    # make up to max_iter iterations
    for i in range(max_iter):
        xlast = x
        x = xlast.copy()  # Start with xlast times I to iterate with (A+I)
        # do the multiplication y^T = x^T A (left eigenvector)
        for n in x:
            for nbr in G[n]:
                x[nbr] += xlast[n] * G[n][nbr].get(weight, 1)
        # Normalize the vector. The normalization denominator `norm`
        # should never be zero by the Perron--Frobenius
        # theorem. However, in case it is due to numerical error, we
        # assume the norm to be one instead.
        norm = sqrt(sum(z**2 for z in x.values())) or 1
        x = {k: v / norm for k, v in x.items()}
        # Check for convergence (in the L_1 norm).
        if sum(abs(x[n] - xlast[n]) for n in x) < nnodes * tol:
            return x
    raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #17
0
def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
    """Returns HITS hubs and authorities values for nodes.

    The HITS algorithm computes two numbers for a node.
    Authorities estimates the node value based on the incoming links.
    Hubs estimates the node value based on outgoing links.

    Parameters
    ----------
    G : graph
      A NetworkX graph

    max_iter : integer, optional
      Maximum number of iterations in power method.

    tol : float, optional
      Error tolerance used to check convergence in power method iteration.

    nstart : dictionary, optional
      Starting value of each node for power method iteration.

    normalized : bool (default=True)
       Normalize results by the sum of all of the values.

    Returns
    -------
    (hubs,authorities) : two-tuple of dictionaries
       Two dictionaries keyed by node containing the hub and authority
       values.

    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> h, a = nx.hits(G)

    Notes
    -----
    The eigenvector calculation is done by the power iteration method
    and has no guarantee of convergence.  The iteration will stop
    after max_iter iterations or an error tolerance of
    number_of_nodes(G)*tol has been reached.

    The HITS algorithm was designed for directed graphs but this
    algorithm does not check if the input graph is directed and will
    execute on undirected graphs.

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Jon Kleinberg,
       Authoritative sources in a hyperlinked environment
       Journal of the ACM 46 (5): 604-32, 1999.
       doi:10.1145/324133.324140.
       http://www.cs.cornell.edu/home/kleinber/auth.pdf.
    """
    if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
        raise Exception("hits() not defined for graphs with multiedges.")
    if len(G) == 0:
        return {}, {}
    # choose fixed starting vector if not given
    if nstart is None:
        h = dict.fromkeys(G, 1.0 / G.number_of_nodes())
    else:
        h = nstart
        # normalize starting vector
        s = 1.0 / sum(h.values())
        for k in h:
            h[k] *= s
    for _ in range(max_iter):  # power iteration: make up to max_iter iterations
        hlast = h
        h = dict.fromkeys(hlast.keys(), 0)
        a = dict.fromkeys(hlast.keys(), 0)
        # this "matrix multiply" looks odd because it is
        # doing a left multiply a^T=hlast^T*G
        for n in h:
            for nbr in G[n]:
                a[nbr] += hlast[n] * G[n][nbr].get("weight", 1)
        # now multiply h=Ga
        for n in h:
            for nbr in G[n]:
                h[n] += a[nbr] * G[n][nbr].get("weight", 1)
        # normalize vector
        s = 1.0 / max(h.values())
        for n in h:
            h[n] *= s
        # normalize vector
        s = 1.0 / max(a.values())
        for n in a:
            a[n] *= s
        # check convergence, l1 norm
        err = sum([abs(h[n] - hlast[n]) for n in h])
        if err < tol:
            break
    else:
        raise nx.PowerIterationFailedConvergence(max_iter)
    if normalized:
        s = 1.0 / sum(a.values())
        for n in a:
            a[n] *= s
        s = 1.0 / sum(h.values())
        for n in h:
            h[n] *= s
    return h, a
Beispiel #18
0
def hits_scipy(G, max_iter=100, tol=1.0e-6, normalized=True):
    """Returns HITS hubs and authorities values for nodes.

    The HITS algorithm computes two numbers for a node.
    Authorities estimates the node value based on the incoming links.
    Hubs estimates the node value based on outgoing links.

    Parameters
    ----------
    G : graph
      A NetworkX graph

    max_iter : integer, optional
      Maximum number of iterations in power method.

    tol : float, optional
      Error tolerance used to check convergence in power method iteration.

    nstart : dictionary, optional
      Starting value of each node for power method iteration.

    normalized : bool (default=True)
       Normalize results by the sum of all of the values.

    Returns
    -------
    (hubs,authorities) : two-tuple of dictionaries
       Two dictionaries keyed by node containing the hub and authority
       values.

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> h, a = nx.hits(G)

    Notes
    -----
    This implementation uses SciPy sparse matrices.

    The eigenvector calculation is done by the power iteration method
    and has no guarantee of convergence.  The iteration will stop
    after max_iter iterations or an error tolerance of
    number_of_nodes(G)*tol has been reached.

    The HITS algorithm was designed for directed graphs but this
    algorithm does not check if the input graph is directed and will
    execute on undirected graphs.

    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Jon Kleinberg,
       Authoritative sources in a hyperlinked environment
       Journal of the ACM 46 (5): 604-632, 1999.
       doi:10.1145/324133.324140.
       http://www.cs.cornell.edu/home/kleinber/auth.pdf.
    """
    try:
        import numpy as np
    except ImportError as e:
        raise ImportError(
            "hits_scipy() requires SciPy and NumPy:"
            "http://scipy.org/ http://numpy.org/"
        ) from e
    if len(G) == 0:
        return {}, {}
    M = nx.to_scipy_sparse_matrix(G, nodelist=list(G))
    (n, m) = M.shape  # should be square
    A = M.T * M  # authority matrix
    x = np.ones((n, 1)) / n  # initial guess
    # power iteration on authority matrix
    i = 0
    while True:
        xlast = x
        x = A * x
        x = x / x.max()
        # check convergence, l1 norm
        err = np.absolute(x - xlast).sum()
        if err < tol:
            break
        if i > max_iter:
            raise nx.PowerIterationFailedConvergence(max_iter)
        i += 1

    a = np.asarray(x).flatten()
    # h=M*a
    h = np.asarray(M * a).flatten()
    if normalized:
        h = h / h.sum()
        a = a / a.sum()
    hubs = dict(zip(G, map(float, h)))
    authorities = dict(zip(G, map(float, a)))
    return hubs, authorities
Beispiel #19
0
def katz_centrality(
    G,
    alpha=0.1,
    beta=1.0,
    max_iter=1000,
    tol=1.0e-6,
    nstart=None,
    normalized=True,
    weight=None,
):
    r"""Compute the Katz centrality for the nodes of the graph G.

    Katz centrality computes the centrality for a node based on the centrality
    of its neighbors. It is a generalization of the eigenvector centrality. The
    Katz centrality for node $i$ is

    .. math::

        x_i = \alpha \sum_{j} A_{ij} x_j + \beta,

    where $A$ is the adjacency matrix of graph G with eigenvalues $\lambda$.

    The parameter $\beta$ controls the initial centrality and

    .. math::

        \alpha < \frac{1}{\lambda_{\max}}.

    Katz centrality computes the relative influence of a node within a
    network by measuring the number of the immediate neighbors (first
    degree nodes) and also all other nodes in the network that connect
    to the node under consideration through these immediate neighbors.

    Extra weight can be provided to immediate neighbors through the
    parameter $\beta$.  Connections made with distant neighbors
    are, however, penalized by an attenuation factor $\alpha$ which
    should be strictly less than the inverse largest eigenvalue of the
    adjacency matrix in order for the Katz centrality to be computed
    correctly. More information is provided in [1]_.

    Parameters
    ----------
    G : graph
      A NetworkX graph.

    alpha : float
      Attenuation factor

    beta : scalar or dictionary, optional (default=1.0)
      Weight attributed to the immediate neighborhood. If not a scalar, the
      dictionary must have an value for every node.

    max_iter : integer, optional (default=1000)
      Maximum number of iterations in power method.

    tol : float, optional (default=1.0e-6)
      Error tolerance used to check convergence in power method iteration.

    nstart : dictionary, optional
      Starting value of Katz iteration for each node.

    normalized : bool, optional (default=True)
      If True normalize the resulting values.

    weight : None or string, optional (default=None)
      If None, all edge weights are considered equal.
      Otherwise holds the name of the edge attribute used as weight.
      In this measure the weight is interpreted as the connection strength.

    Returns
    -------
    nodes : dictionary
       Dictionary of nodes with Katz centrality as the value.

    Raises
    ------
    NetworkXError
       If the parameter `beta` is not a scalar but lacks a value for at least
       one node

    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    Examples
    --------
    >>> import math
    >>> G = nx.path_graph(4)
    >>> phi = (1 + math.sqrt(5)) / 2.0  # largest eigenvalue of adj matrix
    >>> centrality = nx.katz_centrality(G, 1 / phi - 0.01)
    >>> for n, c in sorted(centrality.items()):
    ...     print(f"{n} {c:.2f}")
    0 0.37
    1 0.60
    2 0.60
    3 0.37

    See Also
    --------
    katz_centrality_numpy
    eigenvector_centrality
    eigenvector_centrality_numpy
    pagerank
    hits

    Notes
    -----
    Katz centrality was introduced by [2]_.

    This algorithm it uses the power method to find the eigenvector
    corresponding to the largest eigenvalue of the adjacency matrix of ``G``.
    The parameter ``alpha`` should be strictly less than the inverse of largest
    eigenvalue of the adjacency matrix for the algorithm to converge.
    You can use ``max(nx.adjacency_spectrum(G))`` to get $\lambda_{\max}$ the largest
    eigenvalue of the adjacency matrix.
    The iteration will stop after ``max_iter`` iterations or an error tolerance of
    ``number_of_nodes(G) * tol`` has been reached.

    When $\alpha = 1/\lambda_{\max}$ and $\beta=0$, Katz centrality is the same
    as eigenvector centrality.

    For directed graphs this finds "left" eigenvectors which corresponds
    to the in-edges in the graph. For out-edges Katz centrality
    first reverse the graph with ``G.reverse()``.

    References
    ----------
    .. [1] Mark E. J. Newman:
       Networks: An Introduction.
       Oxford University Press, USA, 2010, p. 720.
    .. [2] Leo Katz:
       A New Status Index Derived from Sociometric Index.
       Psychometrika 18(1):39–43, 1953
       https://link.springer.com/content/pdf/10.1007/BF02289026.pdf
    """
    if len(G) == 0:
        return {}

    nnodes = G.number_of_nodes()

    if nstart is None:
        # choose starting vector with entries of 0
        x = {n: 0 for n in G}
    else:
        x = nstart

    try:
        b = dict.fromkeys(G, float(beta))
    except (TypeError, ValueError, AttributeError) as err:
        b = beta
        if set(beta) != set(G):
            raise nx.NetworkXError("beta dictionary "
                                   "must have a value for every node") from err

    # make up to max_iter iterations
    for i in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast, 0)
        # do the multiplication y^T = Alpha * x^T A - Beta
        for n in x:
            for nbr in G[n]:
                x[nbr] += xlast[n] * G[n][nbr].get(weight, 1)
        for n in x:
            x[n] = alpha * x[n] + b[n]

        # check convergence
        error = sum(abs(x[n] - xlast[n]) for n in x)
        if error < nnodes * tol:
            if normalized:
                # normalize vector
                try:
                    s = 1.0 / math.hypot(*x.values())
                # this should never be zero?
                except ZeroDivisionError:
                    s = 1.0
            else:
                s = 1
            for n in x:
                x[n] *= s
            return x
    raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #20
0
def hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True):
    """Returns HITS hubs and authorities values for nodes.

    .. deprecated:: 2.6

       hits_scipy is deprecated and will be removed in networkx 3.0

    The HITS algorithm computes two numbers for a node.
    Authorities estimates the node value based on the incoming links.
    Hubs estimates the node value based on outgoing links.

    Parameters
    ----------
    G : graph
      A NetworkX graph

    max_iter : integer, optional
      Maximum number of iterations in power method.

    tol : float, optional
      Error tolerance used to check convergence in power method iteration.

    nstart : dictionary, optional
      Starting value of each node for power method iteration.

    normalized : bool (default=True)
       Normalize results by the sum of all of the values.

    Returns
    -------
    (hubs,authorities) : two-tuple of dictionaries
       Two dictionaries keyed by node containing the hub and authority
       values.

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> h, a = nx.hits(G)

    Notes
    -----
    This implementation uses SciPy sparse matrices.

    The eigenvector calculation is done by the power iteration method
    and has no guarantee of convergence.  The iteration will stop
    after max_iter iterations or an error tolerance of
    number_of_nodes(G)*tol has been reached.

    The HITS algorithm was designed for directed graphs but this
    algorithm does not check if the input graph is directed and will
    execute on undirected graphs.

    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Jon Kleinberg,
       Authoritative sources in a hyperlinked environment
       Journal of the ACM 46 (5): 604-632, 1999.
       doi:10.1145/324133.324140.
       http://www.cs.cornell.edu/home/kleinber/auth.pdf.
    """
    import numpy as np
    import warnings

    warnings.warn(
        ("networkx.hits_scipy is deprecated and will be removed"
         "in NetworkX 3.0, use networkx.hits instead."),
        DeprecationWarning,
        stacklevel=2,
    )

    if len(G) == 0:
        return {}, {}
    A = nx.to_scipy_sparse_array(G, nodelist=list(G))
    (n, m) = A.shape  # should be square
    ATA = A.T @ A  # authority matrix
    # choose fixed starting vector if not given
    if nstart is None:
        x = np.ones((n, 1)) / n
    else:
        x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float)
        x = x / x.sum()

    # power iteration on authority matrix
    i = 0
    while True:
        xlast = x
        x = ATA @ x
        x /= x.max()
        # check convergence, l1 norm
        err = np.absolute(x - xlast).sum()
        if err < tol:
            break
        if i > max_iter:
            raise nx.PowerIterationFailedConvergence(max_iter)
        i += 1

    a = x.flatten()
    h = A @ a
    if normalized:
        h = h / h.sum()
        a = a / a.sum()
    hubs = dict(zip(G, map(float, h)))
    authorities = dict(zip(G, map(float, a)))
    return hubs, authorities
Beispiel #21
0
    def _compute_page_rank(self, max_iter=100):
        """Return the PageRank of the nodes in the graph.

        Adapted to return the number of iterations necessary to compute the Page Rank

        Source
        ------
        github.com/networkx/networkx/blob/master/networkx/algorithms/link_analysis/pagerank_alg.py

        """
        # Init graph structure
        if self._input_graph is None:
            # Compute input graph if not defined
            self.draw_input_graph(show_graph=False)

        W = nx.stochastic_graph(self._input_graph, weight=None)
        N = W.number_of_nodes()

        # Init fixed-point constants
        d = self._to_fp(self._get_damping_factor())
        tol = self._to_fp(TOL)
        ZERO = self._to_fp(0)
        ONE = self._to_fp(1.)
        N = self._to_fp(N)
        damping_sum = self._to_fp(self._get_damping_sum())

        # Iterate up to max_iter iterations
        x = dict.fromkeys(W, ONE / N)
        for iter in range(max_iter):
            logger.debug('\n===== TIME STEP = {} ====='.format(iter))
            xlast = x
            x = dict.fromkeys(xlast.keys(), ZERO)

            for node in x:
                pkt = xlast[node] / self._to_fp(len(W[node]))
                logger.debug('[t=%04d|#%3s] Sending pkt %f[%s]' %
                             (iter, node, pkt, self._to_hex(pkt)))

                # Exchange ranks
                for conn_node in W[node]:  # edge: node -> conn_node
                    prev = x[conn_node]
                    # Simulates payload-lossy encoding of the iteration
                    # See c_models/src/common/in_spikes.h:in_spikes_payload_format
                    x[conn_node] += ((pkt >> ITER_BITS) << ITER_BITS)
                    logger.debug("[idx=%3s] %f[%s] + %f[%s] = %f[%s]" %
                                 (conn_node, prev, self._to_hex(prev), pkt,
                                  self._to_hex(pkt), x[conn_node],
                                  self._to_hex(x[conn_node])))

            # Compute dangling factor
            if d != ONE:
                for node in x:
                    prev = x[node]
                    x[node] = damping_sum + d * x[node]
                    logger.debug(
                        "[idx=%3s] %f[%s] * %f[%s] + %f[%s] = %f[%s]" %
                        (node, d, self._to_hex(d), prev, self._to_hex(prev),
                         damping_sum, self._to_hex(damping_sum), x[node],
                         self._to_hex(x[node])))

            # Check convergence, l1 norm
            err = sum([abs(x[node] - xlast[node]) for node in x])
            if err < N * tol:
                if self._labels:
                    x = np.array([np.float64(x[v]) for v in self._labels])
                return x, iter + 1  # iter t+1 happens at the end of time t
        raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #22
0
def signed_hits(G, max_iter=100, tol=1.0e-8, normalized=True):

    if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
        raise Exception("hits() not defined for graphs with multiedges.")
    if len(G) == 0:
        return {}, {}
    # choose fixed starting vector if not given

    h_p = dict.fromkeys(G, 1.0 / G.number_of_nodes())
    h_n = dict.fromkeys(G, - 1.0 / G.number_of_nodes())
    h = h_p
    a = h_p
    for _ in range(max_iter):  # power iteration: make up to max_iter iterations
        h_p_last = h_p
        h_n_last = h_n

        h_p = dict.fromkeys(h_p_last.keys(), 0)
        h_n = dict.fromkeys(h_n_last.keys(), 0)
        a_p = dict.fromkeys(h_p_last.keys(), 0)
        a_n = dict.fromkeys(h_n_last.keys(), 0)

        # this "matrix multiply" looks odd because it is
        # doing a left multiply a^T=hlast^T*G

        for u in h_p:
            for v in G.pred[u]:
                if G[v][u]['weight'] >= 0 :
                    a_p[u] += h_p_last[v] * G[v][u]['weight']
                else :
                    a_n[u] -= h_n_last[v] * G[v][u]['weight']
        for u in h_p:
            for v in G.succ[u]:
                if G[u][v]['weight'] >= 0:
                    h_p[u] += a_p[v] * G[u][v]['weight']
                else :
                    h_n[u] -= a_n[v] * G[u][v]['weight']


        # normalize vector
        s = 1.0 / max(h_p.values())
        for n in h_p:
            h_p[n] *= s
        # normalize vector
        s = -1.0 / min(h_n.values())
        for n in h_n:
            h_n[n] *= s
        # normalize vector
        s = 1.0 / max(a_p.values())
        for n in a_p:
            a_p[n] *= s
        # normalize vector
        s = -1.0 / min(a_n.values())
        for n in a_n:
            a_n[n] *= s

        for key, value in h.items():
            h[key] = h_p[key] - h_n[key]
            a[key] = a_p[key] - a_n[key]

        # check convergence, l1 norm
        err = sum([abs(h_p[n] - h_p_last[n]) for n in h_p] + [abs(h_n[n] - h_n_last[n]) for n in h_n] )
        if err < tol:
            break
    else:
        raise nx.PowerIterationFailedConvergence(max_iter)
    if normalized:
        s = 1.0 / sum(a.values())
        for n in a:
            a[n] *= s
        s = 1.0 / sum(h.values())
        for n in h:
            h[n] *= s
    return h, a
def futureclusterank_CT_scipy(G, alpha=0.2, gamma=0.7, delta=0.1, personalization=None, clusterization=None,
                   max_iter=100, tol=1.0e-6, weight='weight',
                   dangling=None):
    """Return the FutureRank CT of the nodes in the graph.

    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
    an algorithm to rank web pages.

    Parameters
    ----------
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.

    alpha : float, optional
      Parameter for FutureClusterRank CT, default=0.2
      
    gamma : float, optional
      Parameter for FutureClusterRank CT, default=0.7
      
    delta : float, optional
      Parameter for FutureClusterRank CT, default=0.1      

    personalization: dict, optional
      The "personalization vector" consisting of a dictionary with a
      key some subset of graph nodes and personalization value each of those.
      At least one personalization value must be non-zero.
      If not specfiied, a nodes personalization value will be zero.
      By default, a uniform distribution is used.
     
    clusterization: dict, optional
      The "clusterization vector" consisting of a dictionary with a
      key some subset of graph nodes and clusterization value each of those.
      At least one clusterization value must be non-zero.
      If not specfiied, a nodes clusterization value will be zero.
      By default, a uniform distribution is used.    

    max_iter : integer, optional
      Maximum number of iterations in power method eigenvalue solver.

    tol : float, optional
      Error tolerance used to check convergence in power method solver.

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified) This must be selected to result in an irreducible transition
      matrix (see notes under google_matrix). It may be common to have the
      dangling dict to be the same as the personalization dict.
    Returns
    -------
    FutureRank CT : dictionary
       Dictionary of nodes with FutureRank as value

    """
    import scipy.sparse

    N = len(G)
    if N == 0:
        return {}

    nodelist = list(G)
    M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight,
                                  dtype=float)
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M

    # initial vector
    x = scipy.repeat(1.0 / N, N)

    # Personalization vector
    if personalization is None:
        p = scipy.repeat(1.0 / N, N)
    else:
        p = scipy.array([personalization.get(n, 0.5) for n in nodelist], dtype=float)
        p = p / p.sum()
        
    # Clusterization vector
    if clusterization is None:
        q = scipy.repeat(1.0 / N, N)
    else:
        q = scipy.array([clusterization.get(n, 0.33) for n in nodelist], dtype=float)
        q = q / q.sum()        


    # Dangling nodes
    if dangling is None:
        dangling_weights = p
    else:
        # Convert the dangling dictionary into an array in nodelist order
        dangling_weights = scipy.array([dangling.get(n, 0) for n in nodelist],
                                       dtype=float)
        dangling_weights /= dangling_weights.sum()
    is_dangling = scipy.where(S == 0)[0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + \
            + gamma * p + delta * q + \
            (1 - alpha-gamma-delta) * scipy.repeat(1.0 / N, N) 
        # check convergence, l1 norm
        err = scipy.absolute(x - xlast).sum()
        if err < N * tol:
            return dict(zip(nodelist, map(float, x)))
    raise nx.PowerIterationFailedConvergence(max_iter)
Beispiel #24
0
def pagerank_scipy(
    G,
    alpha=0.85,
    personalization=None,
    max_iter=100,
    tol=1.0e-6,
    nstart=None,
    weight="weight",
    dangling=None,
):
    """Returns the PageRank of the nodes in the graph.

    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
    an algorithm to rank web pages.

    Parameters
    ----------
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.

    alpha : float, optional
      Damping parameter for PageRank, default=0.85.

    personalization: dict, optional
      The "personalization vector" consisting of a dictionary with a
      key some subset of graph nodes and personalization value each of those.
      At least one personalization value must be non-zero.
      If not specfiied, a nodes personalization value will be zero.
      By default, a uniform distribution is used.

    max_iter : integer, optional
      Maximum number of iterations in power method eigenvalue solver.

    tol : float, optional
      Error tolerance used to check convergence in power method solver.

    nstart : dictionary, optional
      Starting value of PageRank iteration for each node.

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified) This must be selected to result in an irreducible transition
      matrix (see notes under google_matrix). It may be common to have the
      dangling dict to be the same as the personalization dict.

    Returns
    -------
    pagerank : dictionary
       Dictionary of nodes with PageRank as value

    Examples
    --------
    >>> G = nx.DiGraph(nx.path_graph(4))
    >>> pr = nx.pagerank_scipy(G, alpha=0.9)

    Notes
    -----
    The eigenvector calculation uses power iteration with a SciPy
    sparse matrix representation.

    This implementation works with Multi(Di)Graphs. For multigraphs the
    weight between two nodes is set to be the sum of all edge weights
    between those nodes.

    See Also
    --------
    pagerank, pagerank_numpy, google_matrix

    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
       The PageRank citation ranking: Bringing order to the Web. 1999
       http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
    """
    msg = "networkx.pagerank_scipy is deprecated and will be removed in NetworkX 3.0, use networkx.pagerank instead."
    warn(msg, DeprecationWarning, stacklevel=2)
    import numpy as np
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    N = len(G)
    if N == 0:
        return {}

    nodelist = list(G)
    A = nx.to_scipy_sparse_array(G,
                                 nodelist=nodelist,
                                 weight=weight,
                                 dtype=float)
    S = A.sum(axis=1)
    S[S != 0] = 1.0 / S[S != 0]
    # TODO: csr_array
    Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape))
    A = Q @ A

    # initial vector
    if nstart is None:
        x = np.repeat(1.0 / N, N)
    else:
        x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float)
        x = x / x.sum()

    # Personalization vector
    if personalization is None:
        p = np.repeat(1.0 / N, N)
    else:
        p = np.array([personalization.get(n, 0) for n in nodelist],
                     dtype=float)
        if p.sum() == 0:
            raise ZeroDivisionError
        p = p / p.sum()
    # Dangling nodes
    if dangling is None:
        dangling_weights = p
    else:
        # Convert the dangling dictionary into an array in nodelist order
        dangling_weights = np.array([dangling.get(n, 0) for n in nodelist],
                                    dtype=float)
        dangling_weights /= dangling_weights.sum()
    is_dangling = np.where(S == 0)[0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = alpha * (x @ A +
                     sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p
        # check convergence, l1 norm
        err = np.absolute(x - xlast).sum()
        if err < N * tol:
            return dict(zip(nodelist, map(float, x)))
    raise nx.PowerIterationFailedConvergence(max_iter)