Example #1
0
    def _calculate(self, include: set, is_regression=False):
        self._features = {}

        for graph in nx.connected_component_subgraphs(self._gnx):
            if len(graph) < 2:
                self._features.update(zip(graph.nodes(), [0.] * len(graph)))
            else:
                self._features.update(zip(graph.nodes(), map(float, alg_connectivity.fiedler_vector(graph))))
Example #2
0
    def _calculate(self, include: set):
        self._features = {}

        for connected_component in nx.connected_components(self._gnx):
            graph = self._gnx.subgraph(connected_component)
            if len(graph) < 2:
                self._features.update(zip(graph.nodes(), [0.] * len(graph)))
            else:
                self._features.update(zip(graph.nodes(), map(float, alg_connectivity.fiedler_vector(graph))))
Example #3
0
def fiedlerVector(gnx, f, ft):
    start = timer.start(ft, 'fiedler_vector')
    fiedlerVector = nx.fiedler_vector(gnx)
    timer.stop(ft, start)
    fiedlerMap = {}
    for i in range(len(fiedlerVector)):
        f.writelines(str(gnx.nodes()[i]) + ',' + str(fiedlerVector[i]) + '\n')
        fiedlerMap[gnx.nodes()[i]] = fiedlerVector[i]
    return fiedlerMap
Example #4
0
def _basic_partitioning(G, n1, n2):
    '''Return graph G divided in two parts of specified sizes'''
    '''n = len(G)

    if number_of_selfloops(G):
        raise nx.NetworkXNotImplemented("Graph with self-edges.")
    if is_weighted(G):
        raise nx.NetworkXNotImplemented("Weighted graph.")
    if is_directed(G):
        raise nx.NetworkXNotImplemented("Directed graph.")
    if is_empty(G):
        raise nx.NetworkXNotImplemented("Empty graph.")
    if not nx.is_connected(G):
        raise nx.NetworkXException("Non connected graph.")
    if n < 2:
        raise nx.NetworkXException("Too small graph.")
    if n1 + n2 != n:
        raise nx.NetworkXException("Invalid components.")'''

    # Preparo il vettore da cui estrarrò le componenti
    v2 = fiedler_vector(G)
    # print("v2: ", v2.shape, v2.dtype.name)
    # print("il fiedler vector è:\n", v2)
    # print("Somma degli elementi dell'autovettore di fiedler: ", v2.sum()) # Dovrebbe essere circa 0
    mapped_v2 = get_mapped_vector(v2)
    # print("mapped_v2: ", mapped_v2.shape, mapped_v2.dtype.name)
    # print("l'arrey mappato è:\n", mapped_v2)
    sorted_v2 = get_sorted_vector(mapped_v2)
    # print("l'arrey ordinato è:\n", sorted_v2)

    # Creo e controllo le due classi e relativi insiemi di taglio
    component_test1 = set(sorted_v2[:n1, 0].flat)
    component_test2 = set(sorted_v2[:n2, 0].flat)
    print("Questo è component_test1:\n", component_test1)
    print("Questo è component_test2:\n", component_test2)
    cut_size_1 = cut_size(G, component_test1)
    cut_size_2 = cut_size(G, component_test2)
    print("Il primo cut size vale: ", cut_size_1)
    print("Il secondo cut size vale: ", cut_size_2)

    # Rimuovo gli archi del grafo che fanno parte dell'insieme di taglio
    if cut_size_1 < cut_size_2:
        component_final = component_test1
        H = graph_division(G, component_final)
        return H
    else:
        component_final = component_test2
        H = graph_division(G, component_final)
        return H
Example #5
0
def nx_fiedler_communities(M):
    nx_graph = get_undirected_nx_network(M)

    # Remove detected communities
    community_1 = ['Argentina', 'Venezuela']
    community_2 = ['Italy', 'France', 'Belgium', 'Germany', 'Spain', 'United States', 'Portugal', 'United Kingdom', 'Greece']
    for node in community_1 + community_2:
        nx_graph.remove_node(node)

    nodes = np.array(nx_graph.nodes())
    vector = fiedler_vector(nx_graph, weight='weight')

    print "-----"
    print nodes[vector >= 0]
    print nodes[vector < 0]
Example #6
0
def nodefeat(g, fil, norm=False, **kwargs):
    """
    :param g:
    :param fil: deg, cc, random
    :return: node feature (np.array of shape (n_node, 1))
    """
    # g = nx.random_geometric_graph(100, 0.2)
    assert nx.is_connected(g)

    if fil == 'deg':
        nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape(
            len(g), 1)
    elif fil == 'cc':
        nodefeat = np.array(list(nx.closeness_centrality(g).values()))
        nodefeat = nodefeat.reshape(len(g), 1)
    elif fil == 'random':
        nodefeat = np.random.random((len(g), 1))
    elif fil == 'hop':
        base = kwargs['base']
        assert type(base) == int
        length = nx.single_source_dijkstra_path_length(g, base)  # dict #
        nodefeat = [length[i] for i in range(len(g))]
        nodefeat = np.array(nodefeat).reshape(len(g), 1)
    elif fil == 'fiedler':
        nodefeat = fiedler_vector(g, normalized=False)  # np.ndarray
        nodefeat = nodefeat.reshape(len(g), 1)
    elif fil == 'ricci':
        g = ricciCurvature(g, alpha=0.5, weight='weight')
        ricci_dict = nx.get_node_attributes(g, 'ricciCurvature')
        ricci_list = [ricci_dict[i] for i in range(len(g))]
        nodefeat = np.array(ricci_list).reshape((len(g), 1))
    else:
        raise Exception('No such filtration: %s' % fil)
    assert nodefeat.shape == (len(g), 1)

    # normalize
    if norm: nodefeat = nodefeat / float(max(abs(nodefeat)))
    return nodefeat
def forced_split_communities_qds(adj, c, cluster_size, normalize,
                                 evd_method, tolerence, seed):
    """Force splits the communities in graph, if the size of the first_community
       is greater than the threshold, such that the splitting least
       compromizes modularity density.

    Parameters
    ----------
    adj : SciPy sparse matrix (csr or csc)
        The N x N Adjacency matrix of the graph.
    c : Integer array
        Current array of community labels for the nodes in the graph as
        ordered by the adjacency matrix.
    cluster_size : integer
        Threshold/maximum size (number of nodes) of a cluster.
    normalize : bool
        Whether the normalized Laplacian matrix is used.
    evd_method : string
        Method of eigenvalue computation. It should be one of 'tracemin'
        (TraceMIN), 'lanczos' (Lanczos iteration) and 'lobpcg' (LOBPCG).
    tolerence : float
        Tolerance of relative residual in eigenvalue computation.
    seed : integer, random_state, or None
        Indicator of random number generation state.

    Returns
    -------
    Integer array
        Array of community labels, as a result of splitting, for the nodes
        in the graph as ordered by the adjacency matrix.

    """

    # Array of unique community labels
    unique_clusters = np.unique(c)

    # Tracks the nodes in each community
    dict_bool = {}

    # Tracks the clusters that are connected to each community
    dict_connected = {}

    for label in unique_clusters:
        # Track the nodes in each community
        dict_bool[label] = (c == label)

        # Initialize each key to an empty set
        dict_connected[label] = set()

    # Track the clusters that are connected to each community
    for comm1 in unique_clusters[:-1]:
        # index of the community 'comm1'
        i = np.where(unique_clusters == comm1)[0][0]
        bool_1 = dict_bool[comm1]
        adj_comm1 = adj[bool_1]

        # Track the clusters that are connected to community 'comm1'
        for comm2 in unique_clusters[i+1:]:
            bool_2 = dict_bool[comm2]
            zero = np.zeros(len(c), dtype=int)
            zero[bool_2] = 1

            # Check if 'comm2' is connected to 'comm1'
            if ((adj_comm1.dot(zero)).sum()) != 0:
                dict_connected[comm1].add(comm2)
                dict_connected[comm2].add(comm1)

    # Create a copy of cluster labels
    c_new = c.copy()

    # Split each community, whose size is greater than the threshold
    for cluster_num in unique_clusters:

        bool_r = dict_bool[cluster_num]

        # Sparse adjacency matrix corresponding to 'cluster_num'
        sub_adj = adj[bool_r].T[bool_r]

        # Subgraph constructed from sparse adjacency matrix of 'cluster_num'
        g = nx.from_scipy_sparse_matrix(sub_adj)
        # Number of nodes in 'g'
        len_g = len(g)

        # Don't consider further splitting singleton communities
        # or communities of size lower than the threshold
        # or a community which has disconnected modules
        if ((len_g == 1) | (len_g <= cluster_size) |
           (not(nx.is_connected(g)))):

            if(not(nx.is_connected(g))):
                print("Warning: Check your data as an earliar iteration \
                      resulted in a cluster with \
                      internal disconnected components")
            continue
        else:

            # Create an array of community labels for the
            # nodes in 'cluster_num'
            c_sub = np.zeros(len_g, dtype=int)

            # indices of the nodes in 'sub_adj'
            sub_index = np.arange(len_g)

            # Determine the fiedler_vector of subgraph 'g'
            f_vector = fiedler_vector(g, weight='weight', normalized=normalize,
                                      tol=tolerence,
                                      method=evd_method, seed=seed)

            # Rearrange the nodes of 'sub_adj' in the descreasing order of
            # elements of fieldler vector
            nodeIds = [i for f_vector, i in sorted(zip(f_vector, sub_index),
                                                   reverse=True)]

            # Initialize the communities corresponding to
            # bipartitioning of 'cluster_num'
            first_community = []
            second_community = []
            second_community.extend(nodeIds)

            # Records the splitting information
            split_info = {}

            # Create a copy of the latest cluster labels
            c_latest = c_new.copy()

            # Create a copy of 'dict_bool'
            dict_bool_copy = dict_bool.copy()

            # Possible splits of 'cluster_num' based on the fielder vector
            for j in range(len(nodeIds)-1):

                # Split the 'cluster_num' into two clusters
                first_community.append(nodeIds[j])
                second_community.remove(nodeIds[j])

                # Graph induced by nodes in 'first_community'
                g1 = g.subgraph(first_community)

                # Graph induced by nodes in 'second_community'
                g2 = g.subgraph(second_community)

                # Check if 'g1' and 'g2' are connected graphs each
                if(nx.is_connected(g1) & nx.is_connected(g2)):
                    # Relabel the cluster labels of nodes in 'cluster_num'
                    c_sub[first_community] = cluster_num
                    new_label = max(c_new) + 1
                    c_sub[second_community] = new_label

                    # Array of the union of connected clusters of the
                    # split communities of 'cluster_num'
                    conn_clusters = \
                        np.array(list(((dict_connected[cluster_num]) |
                                 set([cluster_num, new_label]))))

                    # Update the cluster labels in 'c_latest'
                    c_latest[bool_r] = c_sub

                    # Update the boolean array of the split communities
                    # of 'cluster_num'
                    dict_bool_copy[cluster_num] = (c_latest == cluster_num)
                    dict_bool_copy[new_label] = (c_latest == new_label)

                    # Calculate the modularity density after
                    # splitting 'cluster_num'
                    div_metric = modularity_density(adj,
                                                    c_latest,
                                                    np.unique(c_sub[0:]),
                                                    dict_bool_copy,
                                                    conn_clusters)

                    # Record the split
                    split_info[div_metric] = j

            # Delete to save memory
            del c_latest
            del dict_bool_copy

            # Check if atleast one instance of splitting 'cluster_num' exists
            # that does not result in disconnected modules
            if len(split_info) > 0:
                # Split 'cluster_num' based on the division that
                # least compromizes modularity density
                best_split = split_info[max(split_info.keys())]
                c_sub[nodeIds[0:best_split+1]] = cluster_num
                c_sub[nodeIds[best_split+1:]] = max(c_new) + 1

                # Update 'c_new' with new community labels as a
                # result of splitting 'cluster_num'
                c_new[bool_r] = c_sub
            else:
                print("No split possible for cluster num: {}, \
                 as any further split results in disconnected modules".
                      format(cluster_num))

    # Array of community labels, as a result of splitting, for the nodes
    # in the graph as ordered by the adjacency matrix
    return c_new
Example #8
0
def nodefeat(g, fil, norm=False, **kwargs):
    """
    :param g:
    :param fil: deg, cc, random
    :return: node feature (np.array of shape (n_node, 1))
    """
    # g = nx.random_geometric_graph(100, 0.2)
    t0 = time.time()
    assert nx.is_connected(g)

    if fil == 'deg':
        nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape(
            len(g), 1)
    elif fil == 'cc':
        nodefeat = np.array(list(nx.closeness_centrality(g).values()))
        nodefeat = nodefeat.reshape(len(g), 1)
    elif fil == 'cc_w':
        nodefeat = np.array(
            list(nx.closeness_centrality(g, distance='dist').values()))
        nodefeat = nodefeat.reshape(len(g), 1)
    elif fil == 'random':
        nodefeat = np.random.random((len(g), 1))
    elif fil == 'hop':
        base = kwargs['base']
        assert type(base) == int
        length = nx.single_source_dijkstra_path_length(g, base)  # dict #
        nodefeat = [length[i] for i in range(len(g))]
        nodefeat = np.array(nodefeat).reshape(len(g), 1)

    elif fil == 'fiedler':
        if len(g.edges) == 2 * len(
                g
        ):  # todo hack here. fielder is very slow when n_edges = 2*n_edge
            nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape(
                len(g), 1)
        else:
            nodefeat = fiedler_vector(g, normalized=False)  # np.ndarray
            nodefeat = nodefeat.reshape(len(g), 1)

    elif fil == 'fiedler_w':
        if False:  # len(g.edges) == 2 * len(g):  # todo hack here. fielder is very slow when n_edges = 2*n_edge
            nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape(
                len(g), 1)
        else:
            for u, v in g.edges():
                try:
                    assert 'dist' in g[u][v].keys()
                    g[u][v]['dist'] += 1e-6
                except AssertionError:
                    pass
                    # print(f'g[{u}][{v}] = {g[u][v]}')
            print(f'bottleneck graph {len(g)}/{len(g.edges())}')
            # for line in nx.generate_edgelist(g):
            #     print(line)
            print('-' * 50)
            nodefeat = fiedler_vector(g,
                                      normalized=False,
                                      weight='dist',
                                      method='tracemin_lu')  # np.ndarray
            print('after true fiedler')
            nodefeat = nodefeat.reshape(len(g), 1)

    elif fil == 'fiedler_s':
        nodefeat = fiedler_vector(g, normalized=False)  # np.ndarray
        nodefeat = nodefeat.reshape(len(g), 1)
        nodefeat = np.multiply(nodefeat, nodefeat)

    elif fil == 'ricci':
        try:
            g = ricciCurvature(g, alpha=0.5, weight='weight')
            ricci_dict = nx.get_node_attributes(g, 'ricciCurvature')
            ricci_list = [ricci_dict[i] for i in range(len(g))]
            nodefeat = np.array(ricci_list).reshape((len(g), 1))
        except:
            nodefeat = np.random.random(
                (len(g), 1)
            )  # cvxpy.error.SolverError: Solver 'ECOS' failed. Try another solver.
    elif fil[:3] == 'hks':
        assert fil[3] == '_'
        t = float(fil[4:])
        from Esme.dgms.hks import hks
        nodefeat = hks(g, t)

    elif fil == 'ricci_w':
        try:
            g = ricciCurvature(g, alpha=0.5, weight='dist')
            ricci_dict = nx.get_node_attributes(g, 'ricciCurvature')
            ricci_list = [ricci_dict[i] for i in range(len(g))]
            nodefeat = np.array(ricci_list).reshape((len(g), 1))
        except:
            nodefeat = np.random.random(
                (len(g), 1)
            )  # cvxpy.error.SolverError: Solver 'ECOS' failed. Try another solver.

    else:
        raise Exception('No such filtration: %s' % fil)
    assert nodefeat.shape == (len(g), 1)

    # normalize
    if norm: nodefeat = nodefeat / float(max(abs(nodefeat)))
    if time.time() - t0 > 3:
        from Esme.helper.time import precision_format
        print(
            f'nodefeat takes {precision_format(time.time()-t0, 2)} for g {len(g)}/{len(g.edges)}'
        )
        from Esme.viz.graph import viz_graph
        # viz_graph(g, show=True)
    return nodefeat
def split_communities_mqds(adj, c, normalize, evd_method, tolerence, seed):
    """Splits the communities in graph if the splitting
       improves modularity density.

    Parameters
    ----------
    adj : SciPy sparse matrix (csr or csc)
        The N x N Adjacency matrix of the graph.
    c : Integer array
        Current array of community labels for the nodes in the graph as
        ordered by the adjacency matrix.
    normalize : bool
        Whether the normalized Laplacian matrix is used.
    evd_method : string
        Method of eigenvalue computation. It should be one of 'tracemin'
        (TraceMIN), 'lanczos' (Lanczos iteration) and 'lobpcg' (LOBPCG).
    tolerence : float
        Tolerance of relative residual in eigenvalue computation.
    seed : integer, random_state, or None
        Indicator of random number generation state.

    Returns
    -------
    Integer array
        Array of community labels, as a result of splitting, for the nodes
        in the graph as ordered by the adjacency matrix.

    """

    unique_clusters = np.unique(c)
    dict_bool = {}
    curr_modularity = mula_modularity_density(adj, c)
    curr_c  = c.copy()
    split_info = []
    split = False

    for label in unique_clusters:
        # Track the nodes in each community
        dict_bool[label] = (c == label)

    for cluster_num in unique_clusters:
        bool_r = dict_bool[cluster_num]
        sub_adj = adj[bool_r].T[bool_r]
        g = nx.from_scipy_sparse_matrix(sub_adj)
        connected = nx.is_connected(g)
        len_g = sub_adj.shape[0]

        if len_g == 1:
            continue
        elif not connected:
            print("Warning: Check your data as an earliar iteration \
                      resulted in a cluster with \
                      internal disconnected components")
            continue

        f_vector = fiedler_vector(g, weight='weight', normalized=normalize,
                                  tol=tolerence, method=evd_method, seed=seed)

        sub_index = np.arange(len_g)
        nodeIds = [i for f_vector, i in sorted(zip(f_vector, sub_index),
                                               reverse=False,
                                               key=lambda x: x[0])]

        first_community = []
        second_community = []
        second_community.extend(nodeIds)
        c_sub = np.zeros(len_g, dtype=int)
        dict_bool_copy = dict_bool.copy()

        for idx in range(len_g-1):
            first_community.append(second_community.pop())
            g1 = g.subgraph(first_community)
            g2 = g.subgraph(second_community)

            if(nx.is_connected(g1) & nx.is_connected(g2)):
                c_sub[first_community] = cluster_num
                new_label = max(curr_c) + 1
                c_sub[second_community] = new_label

                scratch_c = c.copy()
                scratch_c[bool_r] = c_sub

                split_value = mula_modularity_density(adj, scratch_c)

                if split_value > curr_modularity:
                    split_info.append((split_value, scratch_c))

        if len(split_info) > 0:
            split = True
            curr_c = max(split_info, key=lambda x: x[0])[1]

    return split, curr_c
Example #10
0
start_time = time.time()

FILENAME = "soc-Epinions1.txt"
first_line = []
with open("graphs_processed/" + FILENAME) as f:
    first_line = f.readline()

line = first_line
first_line = line.split()
k = first_line[4]

start_time = time.time()

G = nx.read_edgelist("graphs_processed/" + FILENAME)

vec = algebraicconnectivity.fiedler_vector(G, method="tracemin_lu")
vec = np.asarray(vec).reshape(-1, 1)
clusters = cluster.KMeans(int(k)).fit_predict(vec)

cost = 0
nodes = np.asarray(list(G.nodes._nodes.keys()))

for i in range(int(k)):
    size = sum(clusters == i)
    print(size)
    cost += algo.cut_size(G, nodes[clusters == i]) / size

print("Cost: ", cost)
print("--- %s seconds ---" % (time.time() - start_time))

f = open("results/" + FILENAME, "w+")
def split_communities_q(adj, c, split_track, merge_track, r, normalize,
                        evd_method, tolerence, seed):
    """Splits the communities in graph if the splitting improves modularity.

    Parameters
    ----------
    adj : SciPy sparse matrix (csr or csc)
        The N x N Adjacency matrix of the graph.
    c : Integer array
        Current array of community labels for the nodes in the graph as ordered
        by the adjacency matrix.
    split_track : dictionary
        Tracks the communities fit for splitting; contains cluster labels as
        dictionary keys, and corresponding binary values (0 or 1) as values;
        1 indicates the community is fit for splitting,
        0 indicates the community is not fit for splitting.
    merge_track : dictionary
        Tracks the communities fit for merging; contains cluster labels as
        dictionary keys, and corresponding binary values (0 or 1) as values;
        1 indicates the community is fit for merging,
        0 indicates the community is not fit for merging.
    r : float
        Resolution of the topology: smaller 'r' favors forming larger
        communities, while larger 'r' favors forming smaller communities.
    normalize : bool
        Whether the normalized Laplacian matrix is used.
    evd_method : string
        Method of eigenvalue computation. It should be one of 'tracemin'
        (TraceMIN), 'lanczos' (Lanczos iteration) and 'lobpcg' (LOBPCG).
    tolerence : float
        Tolerance of relative residual in eigenvalue computation.
    seed : integer, random_state, or None
        Indicator of random number generation state.

    Returns
    -------
    tuple
        Tuple of the array of community labels of the nodes (as a result of
        merging), updated 'split_track' and 'merge_track'.

    """

    # Array of unique cluster labels
    unique_clusters = np.unique(c)

    # Tracks the nodes in each community
    dict_bool = {}

    for label in unique_clusters:
        # Track the nodes in each community
        dict_bool[label] = (c == label)

    # Determine the contribution of each community to modularity
    comm_metric = np.array([
        modularity_r(adj, c, [cluster_num], r, dict_bool)
        for cluster_num in unique_clusters
    ])

    # Create a copy of cluster labels
    c_new = c.copy()

    # Create a copy of 'split_track' and 'merge_track'
    split_change = split_track.copy()
    merge_change = merge_track.copy()

    # Split each community further if it improves modularity
    for cluster_num in unique_clusters:

        bool_r = dict_bool[cluster_num]

        # Sparse adjacency matrix corresponding to 'cluster_num'
        sub_adj = adj[bool_r].T[bool_r]

        # Subgraph constructed from sparse adjacency matrix of 'cluster_num'
        g = nx.from_scipy_sparse_matrix(sub_adj)
        # Number of nodes in 'g'
        len_g = len(g)

        # Don't consider further splitting singleton communities or a community
        # which has disconnected modules or
        # a community which is not fit for splitting
        if ((len_g == 1) | (not (nx.is_connected(g))) |
            (split_change[cluster_num] != 1)):
            if (not (nx.is_connected(g))):
                print("Warning: Check your data as an earliar iteration \
                     resulted in a cluster with \
                     internal disconnected components")
            continue
        else:

            # Create an array of community labels for nodes in 'cluster_num'
            c_sub = np.zeros(len_g, dtype=int)

            # indices of the nodes in 'sub_adj'
            sub_index = np.arange(len_g)

            # Determine the fiedler_vector of subgraph 'g'
            f_vector = fiedler_vector(g,
                                      weight='weight',
                                      normalized=normalize,
                                      tol=tolerence,
                                      method=evd_method,
                                      seed=seed)

            # Rearrange the nodes of 'sub_adj' in the descreasing order of
            # elements of fieldler vector
            nodeIds = [
                i for f_vector, i in sorted(zip(f_vector, sub_index),
                                            reverse=True)
            ]

            # Initialize the communities corresponding to bipartitioning of
            # 'cluster_num'
            first_community = []
            second_community = []
            second_community.extend(nodeIds)

            # Modularity metric value for 'cluster_num'
            curr_metric = comm_metric[unique_clusters == cluster_num][0]

            # Records the splitting information
            split_info = {}

            # Create a copy of the latest cluster labels
            c_latest = c_new.copy()

            # Possible splits of 'cluster_num' based on the fielder vector
            for j in range(len(nodeIds) - 1):

                # Split the 'cluster_num' into two clusters
                first_community.append(nodeIds[j])
                second_community.remove(nodeIds[j])

                # Graph induced by nodes in 'first_community'
                g1 = g.subgraph(first_community)

                # Graph induced by nodes in 'second_community'
                g2 = g.subgraph(second_community)

                # Check if 'g1' and 'g2' are connected graphs each
                if (nx.is_connected(g1) & nx.is_connected(g2)):
                    # Relabel the cluster labels of nodes in 'cluster_num'
                    c_sub[first_community] = cluster_num
                    new_label = max(c_new) + 1
                    c_sub[second_community] = new_label

                    # Update the cluster labels in 'c_latest'
                    c_latest[bool_r] = c_sub

                    # Tracks the nodes in each of the split communities
                    # of 'cluster_num'
                    dict_bool_copy = dict()
                    dict_bool_copy[cluster_num] = (c_latest == cluster_num)
                    dict_bool_copy[new_label] = (c_latest == new_label)

                    # Calculate the difference in modularity for
                    # splitting 'cluster_num'
                    div_metric = (
                        modularity_r(adj, c_latest, np.unique(c_sub[0:]), r,
                                     dict_bool_copy) - curr_metric)

                    # Record the split only if it improves the modularity
                    if div_metric > 0:
                        split_info[div_metric] = j

                    # Delete to save memory
                    del dict_bool_copy

            # Delete to save memory
            del c_latest

            # Check if atleast one instance of splitting 'cluster_num' exists
            # that improves modularity
            if len(split_info) > 0:
                # Split 'cluster_num' based on the division that
                # best improves modularity
                best_split = split_info[max(split_info.keys())]
                c_sub[nodeIds[0:best_split + 1]] = cluster_num
                new_label = max(c_new) + 1
                c_sub[nodeIds[best_split + 1:]] = new_label

                # Update 'c_new' with new community labels as a result of
                # splitting 'cluster_num'
                c_new[bool_r] = c_sub

                # Update the dictionary key-value pair, as the
                # community 'cluster_num' split into two communities
                split_change[cluster_num] = 1
                split_change[new_label] = 1
                merge_change[cluster_num] = 1
                merge_change[new_label] = 1
            else:
                # Set the dictionary value to 0 for the
                # key 'cluster_num' that did not split
                split_change[cluster_num] = 0

    # Resultant integer array of community labels of the
    # nodes (as a result of splitting), updated 'split_change'
    # and updated 'merge_change'
    return (c_new, split_change, merge_change)
Example #12
0
 def _calculate_dep(self, include: set):
     # Working on every connected component by itself
     self._features = dict(zip(self._gnx, alg_connectivity.fiedler_vector(self._gnx)))
Example #13
0
""" sbm graph classification """

from Esme.dgms.fil import nodefeat
from Esme.graph.function import fil_strategy
from Esme.graph.generativemodel import sbms
from networkx.linalg.algebraicconnectivity import fiedler_vector

if __name__ == '__main__':
    n = 1
    p, q = 0.5, 0.1
    gs = sbms(n=n, n1=100, n2=50, p=p, q=q)
    for i in range(len(gs)):
        g = gs[i]
        # lapfeat = nodefeat(g, 'fiedler', norm=True)
        nodefeat = fiedler_vector(g, normalized=False)  # np.ndarray
        nodefeat = nodefeat.reshape(len(g), 1)

        gs[i] = fil_strategy(g, nodefeat, method='node', viz_flag=False)

    print('Finish computing lapfeat')
Example #14
0
def function_basis(g,
                   allowed,
                   norm_flag='no',
                   recomputation_flag=False,
                   transformation_flag=True):
    """

    :param g: nx graph
    :param allowed: filtration type, allowed = ['ricci', 'deg', 'hop', 'cc', 'fiedler']
    :param norm_flag: normalization flag
    :param recomputation_flag:
    :param transformation_flag: if apply linear/nonlinear transformation of filtration function
    :return: g with ricci, deg, hop, cc, fiedler computed
    """

    # to save recomputation. Look at the existing feature at first and then simply compute the new one.
    assert nx.is_connected(g)
    if len(g) < 3: return
    existing_features = [g.node[list(g.nodes())[0]].keys()]

    if not recomputation_flag:
        allowed = [
            feature for feature in allowed if feature not in existing_features
        ]
    elif recomputation_flag:
        allowed = allowed

    def norm(g_, key, flag=norm_flag):
        if flag == 'no': return 1
        elif flag == 'yes':
            return np.max(np.abs(nx.get_node_attributes(g_,
                                                        key).values())) + 1e-6
        else:
            raise ('Error')

    # ricci
    g_ricci = g
    if 'ricciCurvature' in allowed:
        try:
            g_ricci = ricciCurvature(g, alpha=0.5, weight='weight')
            assert g_ricci.node.keys() == list(g.nodes())
            ricci_norm = norm(g, 'ricciCurvature', norm_flag)
            for n_ in g_ricci.nodes():
                g_ricci.node[n_]['ricciCurvature'] /= ricci_norm
        except:
            print('RicciCurvature Error for graph, set 0 for all nodes')
            for n in g_ricci.nodes():
                g_ricci.node[n]['ricciCurvature'] = 0

    # degree
    if 'deg' in allowed:
        deg_dict = dict(nx.degree(g_ricci))
        for n in g_ricci.nodes():
            g_ricci.node[n]['deg'] = deg_dict[n]
        deg_norm = norm(g_ricci, 'deg', norm_flag)
        for n in g_ricci.nodes():
            g_ricci.node[n]['deg'] /= np.float(deg_norm)

    # hop
    if 'hop' in allowed:
        distance = nx.floyd_warshall_numpy(g)  # return a matrix
        distance = np.array(distance)
        distance = distance.astype(int)
        if norm_flag == 'no': hop_norm = 1
        elif norm_flag == 'yes': hop_norm = np.max(distance)
        else: raise Exception('norm flag has to be yes or no')
        for n in g_ricci.nodes():
            # if g_ricci has non consecutive nodes, n_idx is the index of hop distance matrix
            n_idx = list(g_ricci.nodes).index(n)
            assert n_idx <= len(g_ricci)
            # print(n, n_idx)
            g_ricci.node[n]['hop'] = distance[n_idx][:] / float(hop_norm)

    # closeness_centrality
    if 'cc' in allowed:
        cc = nx.closeness_centrality(g)  # dict
        cc = {k: v / min(cc.values())
              for k, v in cc.iteritems()}  # no normalization for debug use
        cc = {k: 1.0 / v for k, v in cc.iteritems()}
        for n in g_ricci.nodes():
            g_ricci.node[n]['cc'] = cc[n]

    # fiedler
    if 'fiedler' in allowed:
        fiedler = fiedler_vector(g, normalized=False)  # np.ndarray
        assert max(fiedler) > 0
        fiedler = fiedler / max(np.abs(fiedler))
        assert max(np.abs(fiedler)) == 1
        for n in g_ricci.nodes():
            n_idx = list(g_ricci.nodes).index(n)
            g_ricci.node[n]['fiedler'] = fiedler[n_idx]

    any_node = list(g_ricci.node)[0]
    if 'label' not in g_ricci.node[any_node].keys():
        for n in g_ricci.nodes():
            g_ricci.node[n]['label'] = 0  # add dummy
    else:  # contains label key
        assert 'label' in g_ricci.node[any_node].keys()
        for n in g_ricci.nodes():
            label_norm = 40
            if graph == 'dd_test': label_norm = 90
            g_ricci.node[n]['label'] /= float(label_norm)

    if 'deg' in allowed:
        for n in g_ricci.nodes():
            attribute_mean(g_ricci, n, key='deg', cutoff=1, iteration=0)

        # better normalization, used to include 1_0_deg_std/ deleted now:
        if norm_flag == 'yes':
            for attr in ['1_0_deg_sum']:
                norm_ = norm(g_ricci, attr, norm_flag)
                for n in g_ricci.nodes():
                    g_ricci.node[n][attr] = g_ricci.node[n][attr] / float(
                        norm_)

    if 'label' in allowed:
        for n in g_ricci.nodes():
            attribute_mean(g_ricci, n, key='label', cutoff=1, iteration=0)
        for n in g_ricci.nodes():
            attribute_mean(g_ricci, n, key='label', cutoff=1, iteration=1)

    if 'cc_min' in allowed:
        for n in g_ricci.nodes():
            attribute_mean(g_ricci, n, key='cc')

    if 'ricciCurvature_min' in allowed:
        for n in g_ricci.nodes():
            attribute_mean(g_ricci, n, key='ricciCurvature')

    return g_ricci
Example #15
0
        "4 5 {'dist': 0.9401972}"
    ]
    g = nx.parse_edgelist(lines, nodetype=int)
    print(g.edges(data=True))
    # v_weight = fiedler_vector(g, normalized=False, weight='dist')  # np.ndarray
    # v_weight = list(nx.closeness_centrality(g, distance='dist').values())

    g = ricciCurvature(g, alpha=0.5, weight='dist')
    ricci_dict = nx.get_node_attributes(g, 'ricciCurvature')
    v_weight = [ricci_dict[i] for i in range(len(g))]

    print(v_weight)
    sys.exit()

    # g = nx.circulant_graph(10, offsets=[1]*10)
    w_name = 'weightd'
    random.seed(43)
    g = nx.random_tree(20, seed=42)
    for u, v in g.edges():
        g[u][v][w_name] = random.random()
        # print(g[u][v])
    print(g.edges)

    v_noweight = fiedler_vector(g, normalized=False)  # np.ndarray
    v_weight = fiedler_vector(g, normalized=False, weight=w_name)  # np.ndarray
    v_fake_weight = fiedler_vector(g, normalized=False,
                                   weight='abcdefg')  # np.ndarray
    print('no weight', v_noweight)
    print('weight', v_weight)
    print('fake weight', v_fake_weight)