Exemple #1
0
def sum_of_shortest_paths(G, S):
    r"""Returns the difference between the sum of lengths of all pairs shortest paths in G and the one in G\S.
    The experiment metrics in [1]_

    Parameters
    ----------
    G: easygraph.Graph or easygraph.DiGraph

    S: list of int
        A list of nodes witch are structural hole spanners.

    Returns
    -------
    differ_between_sum : int
        The difference between the sum of lengths of all pairs shortest paths in G and the one in G\S.
        C(G/S)-C(G)

    Examples
    --------
    >>> G_t=eg.datasets.get_graph_blogcatalog()
    >>> S_t=eg.AP_Greedy(G_t, 10000)
    >>> diff = sum_of_shortest_paths(G_t, S_t)
    >>> print(diff)

    References
    ----------
    .. [1] https://dl.acm.org/profile/81484650642

    """
    mat_G = eg.Floyd(G)
    sum_G = 0
    inf_const_G = math.ceil((G.number_of_nodes() ** 3) / 3)
    for i in mat_G.values():
        for j in i.values():
            if math.isinf(j):
                j = inf_const_G
            sum_G += j
    G_S = G.copy()
    G_S.remove_nodes(S)
    mat_G_S = eg.Floyd(G_S)
    sum_G_S = 0
    inf_const_G_S = math.ceil((G_S.number_of_nodes() ** 3) / 3)
    for i in mat_G_S.values():
        for j in i.values():
            if math.isinf(j):
                j = inf_const_G_S
            sum_G_S += j
    return sum_G_S - sum_G
Exemple #2
0
def CombineNodes(records, G, label_dict, score_dict, node_dict,
                 Next_label_dict, nodes, degrees, distance_dict):
    onerecord = dict()
    for node, label in label_dict.items():
        if label in onerecord:
            onerecord[label].append(node)
        else:
            onerecord[label] = [node]
    records.append(onerecord)
    Gx = eg.Graph()
    label_dictx = dict()
    score_dictx = dict()
    node_dictx = dict()
    nodesx = []
    cnt = 0
    for record_label in onerecord:
        nodesx.append(cnt)
        label_dictx[cnt] = record_label
        score_dictx[record_label] = score_dict[record_label]
        node_dictx[record_label] = cnt
        cnt += 1
    record_labels = list(onerecord.keys())
    i = 0
    edge = dict()
    adj = G.adj
    for i in range(0, len(record_labels)):
        edge[i] = dict()
        for j in range(0, len(record_labels)):
            if i == j: continue
            inodes = onerecord[record_labels[i]]
            jnodes = onerecord[record_labels[j]]
            for unode in inodes:
                for vnode in jnodes:
                    if unode in adj and vnode in adj[unode]:
                        if j not in edge[i]:
                            edge[i][j] = 0
                        edge[i][j] += adj[unode][vnode].get("weight", 1)
    for unode in edge:
        for vnode, w in edge[unode].items():
            if unode < vnode:
                Gx.add_edge(unode, vnode, weight=w)
    G = Gx
    label_dict = label_dictx
    score_dict = score_dictx
    node_dict = node_dictx
    Next_label_dict = label_dictx
    nodes = nodesx
    degrees = G.degree()
    distance_dict = eg.Floyd(G)
    return records, G, label_dict, score_dict, node_dict, Next_label_dict, nodes, degrees, distance_dict
Exemple #3
0
def bounded_inverse_closeness_centrality(G, v, l):
    queue = []
    queue.append(v)
    seen = set()
    seen.add(v)
    shortest_path = eg.Floyd(G)
    result = 0
    while (len(queue) > 0):
        vertex = queue.pop(0)
        if shortest_path[v][vertex] == l + 1:
            break
        nodes = G.neighbors(node=vertex)
        for w in nodes:
            if w not in seen:
                queue.append(w)
                seen.add(w)
                result += shortest_path[v][w]
    return result / (len(G) - 1)
Exemple #4
0
def kamada_kawai_layout(G,
                        dist=None,
                        pos=None,
                        weight="weight",
                        scale=1,
                        center=None,
                        dim=2):
    """Position nodes using Kamada-Kawai path-length cost-function.

    Parameters
    ----------
    G : graph or list of nodes
        A position will be assigned to every node in G.

    dist : dict (default=None)
        A two-level dictionary of optimal distances between nodes,
        indexed by source and destination node.
        If None, the distance is computed using shortest_path_length().

    pos : dict or None  optional (default=None)
        Initial positions for nodes as a dictionary with node as keys
        and values as a coordinate list or tuple.  If None, then use
        circular_layout() for dim >= 2 and a linear layout for dim == 1.

    weight : string or None   optional (default='weight')
        The edge attribute that holds the numerical value used for
        the edge weight.  If None, then all edge weights are 1.

    scale : number (default: 1)
        Scale factor for positions.

    center : array-like or None
        Coordinate pair around which to center the layout.

    dim : int
        Dimension of layout.

    Returns
    -------
    pos : dict
        A dictionary of positions keyed by node

    Examples
    --------
    >>> pos = eg.kamada_kawai_layout(G)
    """

    nNodes = len(G)
    if nNodes == 0:
        return {}

    if dist is None:
        dist = dict(eg.Floyd(G))
    dist_mtx = 1e6 * np.ones((nNodes, nNodes))
    for row, nr in enumerate(G):
        if nr not in dist:
            continue
        rdist = dist[nr]
        for col, nc in enumerate(G):
            if nc not in rdist:
                continue
            dist_mtx[row][col] = rdist[nc]

    if pos is None:
        if dim >= 3:
            pos = eg.random_position(G, dim=dim)
        elif dim == 2:
            pos = eg.circular_position(G)
        else:
            pos = {n: pt for n, pt in zip(G, np.linspace(0, 1, len(G)))}

    pos_arr = np.array([pos[n] for n in G])

    pos = _kamada_kawai_solve(dist_mtx, pos_arr, dim)

    if center is None:
        center = np.zeros(dim)
    else:
        center = np.asarray(center)

    if len(center) != dim:
        msg = "length of center coordinates must match dimension of layout"
        raise ValueError(msg)

    pos = eg.rescale_position(pos, scale=scale) + center
    return dict(zip(G, pos))
Exemple #5
0
def HANP(G, m, delta, threshod=1, hier_open=0, combine_open=0):
    """Detect community by Hop attenuation & node preference algorithm

    Return the detected communities. But the result is random.

    Implement the basic HANP algorithm and give more freedom through the parameters, e.g., you can use threshod
    to set the condition for node updating. If network are known to be Hierarchical and overlapping communities,
    it's recommended to choose geodesic distance as the measure(instead of receiving the current hop scores
    from the neighborhood and carry out a subtraction) and When an equilibrium is reached, treat newly combined
    communities as a single node.

    For using Floyd to get the shortest distance, the time complexity is a little high.

    Parameters
    ----------
    G : graph
      A easygraph graph
    m : float
      Used to calculate score, when m > 0, more preference is given to node with more neighbors; m < 0, less
    delta : float
      Hop attenuation
    threshod : float
      Between 0 and 1, only update node whose number of neighbors sharing the maximal label is less than the threshod.
      e.g., threshod == 1 means updating all nodes.
    hier_open :
      1 means using geodesic distance as the score measure.
      0 means not.
    combine_open :
      this option is valid only when hier_open = 1
      1 means When an equilibrium is reached, treat newly combined communities as a single node.
      0 means not.

    Returns
    ----------
    communities : dictionary
      key: serial number of community , value: nodes in the community.

    Examples
    ----------
    >>> HANP(G,
    ...     m = 0.1,
    ...     delta = 0.05,
    ...     threshod = 1,
    ...     hier_open = 0,
    ...     combine_open = 0
    ...     )

    References
    ----------
    .. [1] Ian X. Y. Leung, Pan Hui, Pietro Liò, and Jon Crowcrof:
        Towards real-time community detection in large networks

    """
    nodes = list(G.nodes.keys())
    if len(nodes) == 1:
        return {1: [nodes[0]]}
    label_dict = dict()
    score_dict = dict()
    node_dict = dict()
    Next_label_dict = dict()
    cluster_community = dict()
    nodes = list(G.nodes.keys())
    degrees = G.degree()
    records = []
    loop_count = 0
    i = 0
    old_score = 1
    ori_G = G
    if hier_open == 1:
        distance_dict = eg.Floyd(G)
    for node in nodes:
        label_dict[node] = i
        score_dict[i] = 1
        node_dict[i] = node
        i = i + 1
    while True:
        loop_count += 1
        random.shuffle(nodes)
        score = 1
        for node in nodes:
            labels = SelectLabels_HANP(G, node, label_dict, score_dict,
                                       degrees, m, threshod)
            if labels == []:
                Next_label_dict[node] = label_dict[node]
                continue
            old_label = label_dict[node]
            Next_label_dict[node] = random.choice(labels)
            # Asynchronous updates. If you want to use synchronous updates, comment the line below
            label_dict[node] = Next_label_dict[node]
            if hier_open == 1:
                score_dict[Next_label_dict[node]] = UpdateScore_Hier(
                    G, node, label_dict, node_dict, distance_dict)
                score = min(score, score_dict[Next_label_dict[node]])
            else:
                if old_label == Next_label_dict[node]:
                    cdelta = 0
                else:
                    cdelta = delta
                score_dict[Next_label_dict[node]] = UpdateScore(
                    G, node, label_dict, score_dict, cdelta)
        if hier_open == 1 and combine_open == 1:
            if old_score - score > 1 / 3:
                old_score = score
                (
                    records,
                    G,
                    label_dict,
                    score_dict,
                    node_dict,
                    Next_label_dict,
                    nodes,
                    degrees,
                    distance_dict,
                ) = CombineNodes(
                    records,
                    G,
                    label_dict,
                    score_dict,
                    node_dict,
                    Next_label_dict,
                    nodes,
                    degrees,
                    distance_dict,
                )
        label_dict = Next_label_dict
        if (estimate_stop_cond_HANP(G, label_dict, score_dict, degrees, m,
                                    threshod) is True):
            break
        """As mentioned in the paper, it's suggested that the number of iterations
        required is independent to the number of nodes and that after
        five iterations, 95% of their nodes are already accurately clustered
        """
        if loop_count > 20:
            break
    print("After %d iterations, HANP complete." % loop_count)
    for node in label_dict.keys():
        label = label_dict[node]
        if label not in cluster_community.keys():
            cluster_community[label] = [node]
        else:
            cluster_community[label].append(node)
    if hier_open == 1 and combine_open == 1:
        records.append(cluster_community)
        cluster_community = ShowRecord(records)
    result_community = CheckConnectivity(ori_G, cluster_community)
    return result_community