예제 #1
0
def calc_cluster_coefficient(hcc, V):
    # check max weight first (random groups may have a max of 0)
    max_w = max([w for u, v, w in hcc.edges(data='weight')])
    if max_w == 0:
        return 0
    else:
        return sum(clustering(hcc, weight='weight').values()) / float(len(V))  #average_clustering(hcc)
def get_topological_features(G, nodes=None):
    N_ = len(G.nodes)
    if nodes is None:
        nodes = G.nodes
    # Degree centrality
    d_c = get_features(degree_centrality(G).values())
    print 'a'
    # Betweeness centrality
    b_c = get_features(betweenness_centrality(G).values())
    print 'b'

    # Close ness centrality
    c_c = get_features(closeness_centrality(G).values())
    print 'c'
    # Clustering
    c = get_features(clustering(G).values())
    print 'd'

    d = diameter(G)
    r = radius(G)

    s_p_average = []
    for s in shortest_path_length(G):
        dic = s[1]
        lengths = dic.values()
        s_p_average += [sum(lengths) / float(N_)]

    s_p_average = get_features(s_p_average)

    features = np.concatenate((d_c, b_c, c_c, c, s_p_average, [d], [r]),
                              axis=0)

    return features
예제 #3
0
 def clustering_coefficient_and_path_length(G):
     clustering_values = clustering(G)
     clustering_values = np.mean(
         [abs(value) for value in clustering_values.values()])
     path_length = []
     for C in (G.subgraph(c).copy() for c in nx.connected_components(G)):
         path_length.append(average_shortest_path_length(C))
     path_length = np.mean(path_length)
     return clustering_values, path_length
예제 #4
0
def attack_degree(graph, nodes_number, iteration_removals):
    after_removals = []
    lenght_range = int((nodes_number - iteration_removals)/iteration_removals)
    degress = list(graph.degree())
    degress.sort(key=lambda x: x[1], reverse=True)
    first_element = first_tuple_element(degress)
    degress_reshaped = np.reshape(
        first_element, (iteration_removals, int(nodes_number/iteration_removals))).T

    for iterator in tqdm(range(lenght_range)):
        # remove iterations_removals do maior componente conexo da rede
        graph.remove_nodes_from(degress_reshaped[iterator])
        after_removals.append(
            np.mean(list(nx_cluster.clustering(graph).values())))
    return after_removals
예제 #5
0
    def clustering_coefficient(self, use_undirected=False):
        if use_undirected:
            g = self.g.to_undirected()
        else:
            g = self.g

        if not self.department_clusters:
            self.extract_departments()

        res = []
        for department, cluster in self.department_clusters.items():
            cfs = clustering(g, cluster)
            res.append((department, sum(cfs.values()) / len(cfs)))

        res = sorted(res, key=lambda x: x[1], reverse=True)[:self.top_k]
        return res
예제 #6
0
    def __graph_props(self):
        if self.__data_dict["Connected"]:
            self.__data_dict['Diameter'] = nx.diameter(self.__g)
            self.__data_dict['Radius'] = nx.radius(self.__g)
            self.__data_dict["Centered nodes"] = list(nx.center(self.__g))
        else:
            self.__data_dict['Diameter'] = None
            self.__data_dict['Radius'] = None
            self.__data_dict["Centered nodes"] = []

        if self.__data_dict["Type"] == "Undirected":
            self.__data_dict['Average Clustering'] = nx.average_clustering(
                self.__g)
            self.__clustering_coefficients = cluster.clustering(self.__g)
        else:
            self.__data_dict['Average Clustering'] = None
예제 #7
0
def random_attack(graph, nodes_number, iteration_removals):
    # lista onde estão os valores de clusterização da rede depois da remoção dos nos
    after_removals = []
    # number_nodes_largest_cc = [] #número de nós do maior componente da rede
    lenght_range = int((nodes_number - iteration_removals)/iteration_removals)
    for _ in tqdm(range(lenght_range)):
        # retorna um set, com os nos que representam o maior componente conexo da rede
        largest_cc = max(networkx.connected_components(graph), key=len)
        # se o tamanho do maior componente conexo por menor que o número de nos que quero remover, sair do loop
        if len(largest_cc) < iteration_removals:
            break
        # number_nodes_largest_cc.append(len(largest_cc))
        # remove iterations_removals do maior componente conexo da rede
        graph.remove_nodes_from(random.sample(largest_cc, iteration_removals))
        after_removals.append(
            np.mean(list(nx_cluster.clustering(graph).values())))
    return after_removals
예제 #8
0
def nx_average_clustering_per_k(g):
    coefficients = [None] * g.number_of_nodes()
    for k in range(len(coefficients)):
        coefficients[k] = [0,0]
    clustering_coefficient = clustering(g) # dict of (vertex, cc)
    all_degrees = g.degree() # list of (vertex, degree)
    for deg in all_degrees:
        coefficients[deg[1]][0] += 1
        coefficients[deg[1]][1] += clustering_coefficient[deg[0]]
    # average cc
    ck = []
    for coef in coefficients:
        if coef[0] == 0:
            ck.append(0)
        else:
            ck.append(coef[1]/coef[0])
    return ck
예제 #9
0
def clustering_coefficient(g,
                           department_clusters=None,
                           use_undirected=False,
                           k=20):
    if use_undirected:
        g = g.to_undirected()
    else:
        g = g

    if not department_clusters:
        department_clusters = extract_departments(g)

    res = []
    for department, cluster in department_clusters.items():
        cfs = clustering(g, cluster)
        res.append((department, sum(cfs.values()) / len(cfs)))

    res = sorted(res, key=lambda x: x[1], reverse=True)[:k]
    return res
def graph_stats(G):
    """
    Compute all the graph-related statistics in the features.

    Note that since the graph is always fully connected, all of these are the
    weighted versions. For this reason, many of these functions use the
    implementations in bctpy rather than NetworkX.
    """
    # Local measures
    clustering_dict = clustering(G, weight='weight')
    adjacency = np.array(adjacency_matrix(G).todense())
    betweenness_centrality_dict = betweenness_centrality(G, weight='weight')
    paths = shortest_path_length(G, weight='weight')
    eccentricities = [max(dists.values()) for (source, dists) in sorted(paths)]
    local_measures = np.concatenate(
        [[v for (k, v) in sorted(clustering_dict.items())],
         [v for (k, v) in sorted(betweenness_centrality_dict.items())],
         eccentricities])
    graph_diameter = max(eccentricities)
    graph_radius = min(eccentricities)
    aspl = average_shortest_path_length(G, weight='weight')
    global_measures = np.array([graph_diameter, graph_radius, aspl])
    return np.concatenate([local_measures, global_measures])
예제 #11
0
 def getNodeClusteringCoefficient(
         self,
         node: V) -> Union[Any, int, float, dict[Any, Union[Any, int]]]:
     """
     """
     return clustering(self.graph, node)
예제 #12
0
            G.add_edge(edge, node_id)

pG = projected_graph(G, people)

distances_to_furthest_nodes = dict()
for source, targets_to_paths in shortest_path(pG).iteritems():
    longest_shortest_path_length = 0
    for target, path in targets_to_paths.iteritems():
        longest_shortest_path_length = max(
            [longest_shortest_path_length,
             len(path)])
        distances_to_furthest_nodes[source] = longest_shortest_path_length

nodes['distanceToFurthestNode'] = pandas.Series(distances_to_furthest_nodes,
                                                index=nodes.index)
nodes['clusteringCoefficient'] = pandas.Series(clustering(pG),
                                               index=nodes.index)
nodes['betweennessCentrality'] = pandas.Series(betweenness_centrality(pG),
                                               index=nodes.index)
nodes['degree'] = pandas.Series(pG.degree(), index=nodes.index)

print "vertices: {}".format(pG.number_of_nodes())
print "edges: {}".format(pG.number_of_edges())
print nodes.nlargest(
    5, 'clusteringCoefficient')[['clusteringCoefficient', 'name']]
print nodes.nlargest(
    5, 'betweennessCentrality')[['betweennessCentrality', 'name']]
print nodes.nlargest(
    5, 'distanceToFurthestNode')[['distanceToFurthestNode', 'name']]
print nodes.nlargest(5, 'degree')[['degree', 'name']]
예제 #13
0
def ver_medidas(G):
    print(function.info(G))
    """
    Numero minimo de nodos que deben ser removidos para desconectar G
    """
    print("Numero minimo de nodos que deben ser removidos para desconectar G :"+str(approximation.node_connectivity(G)))

    """
    average clustering coefficient of G.
    """
    print("average clustering coefficient of G: "+str(approximation.average_clustering(G)))

    """
    Densidad de un Grafo
    """
    print("Densidad de G: "+str(function.density(G)))

    """
    Assortativity measures the similarity of connections in
    the graph with respect to the node degree.
    Valores positivos de r indican que existe una correlacion entre nodos 
    con grado similar, mientras que un valor negativo indica
    correlaciones entre nodos de diferente grado
    """

    print("degree assortativity:"+str(assortativity.degree_assortativity_coefficient(G)))

    """
    Assortativity measures the similarity of connections
    in the graph with respect to the given attribute.
    """

    print("assortativity for node attributes: "+str(assortativity.attribute_assortativity_coefficient(G,"crime")))

    """
    Grado promedio vecindad
    """
    plt.plot(assortativity.average_neighbor_degree(G).values())
    plt.title("Grado promedio vecindad")
    plt.xlabel("Nodo")
    plt.ylabel("Grado")
    plt.show();

    """
    Grado de Centralidad de cada nodo
    """

    plt.plot(centrality.degree_centrality(G).values())
    plt.title("Grado de centralidad")
    plt.xlabel("Nodo")
    plt.ylabel("Centralidad")
    plt.show();


    """
    Calcular el coeficiente de agrupamiento para nodos
    """

    plt.plot(cluster.clustering(G).values())
    plt.title("coeficiente de agrupamiento")
    plt.xlabel("Nodo")
    plt.show();

    """
    Media coeficiente de Agrupamiento
    """
    print("Coeficiente de agrupamiento de G:"+str(cluster.average_clustering(G)))

    """
    Centro del grafo
    El centro de un grafo G es el subgrafo inducido por el 
    conjunto de vertices de excentricidad minima.

     La  excentricidad  de  v  in  V  se  define  como  la
     distancia maxima desde v a cualquier otro vertice del 
     grafo G siguiendo caminos de longitud minima.
    """

    print("Centro de G:"+ str(distance_measures.center(G)))

    """
    Diametro de un grafo
    The diameter is the maximum eccentricity.
    """
    print("Diametro de G:"+str(distance_measures.diameter(G)))


    """
    Excentricidad de cada Nodo
    The eccentricity of a node v is the maximum distance
    from v to all other nodes in G.
    """
    plt.plot(distance_measures.eccentricity(G).values())
    plt.title("Excentricidad de cada Nodo")
    plt.xlabel("Nodo")
    plt.show();

    """
    Periferia 
    The periphery is the set of nodes with eccentricity equal to the diameter.
    """
    print("Periferia de G:")
    print(distance_measures.periphery(G))

    """
    Radio
    The radius is the minimum eccentricity.

    """

    print("Radio de G:"+str(distance_measures.radius(G)))

    """
    PageRank calcula una clasificacion de los nodos
    en el grafico G en funcion de la estructura de 
    los enlaces entrantes. Originalmente fue disenado
    como un algoritmo para clasificar paginas web.
    """

    plt.plot(link_analysis.pagerank_alg.pagerank(G).values())
    plt.title("Puntaje de cada Nodo")
    plt.xlabel("Nodo")
    plt.show();

    """
    Coeficiente de Small World.
    A graph is commonly classified as small-world if sigma>1.

    """

    print("Coeficiente de Small World: " + str(smallworld.sigma(G)))

    """
    The small-world coefficient (omega) ranges between -1 and 1.
    Values close to 0 means the G features small-world characteristics.
    Values close to -1 means G has a lattice shape whereas values close
    to 1 means G is a random graph.
    """
    print("Omega coeficiente: "+str(smallworld.omega(G)))
예제 #14
0
    y = np.array(y) + 1
    x = np.log(x)
    y = np.log(y)
    plt.scatter(x, y, s=1, color=(1, 0, 0))
    plt.show()


net_file = '../net_6'

# Read graph from file
G = read_edgelist(net_file)
print("The net_6 has %d nodes and %d edges" %
      (G.number_of_nodes(), G.number_of_edges()))

# Clustering coefficient 0.07872
C = list(clustering(G).values())
C = sum(C) / len(C)
print("The net_6 has clustering coefficient: %f" % C)

scatterplot_degree_distribution(G)
log_scatterplot_degree_distribution(G)
d = parallel_diam(G, 5)
print("The net_6 has diameter: ", d)

###############################
## net 6 is random?
###############################
RG = randomG(10000, 0.0016)
print("The random graph has %d nodes and %d edges" %
      (G.number_of_nodes(), G.number_of_edges()))
# Clustering coefficient 0.07872
# deg_dict = OrderedDict(sorted(graph.degree(), key=lambda x: x[0]))
# deg_dict2 = OrderedDict(sorted(graph2.degree(), key=lambda x: x[0]))
# deg_dict3 = OrderedDict(sorted(graph3.degree(), key=lambda x: x[0]))
# deg_dict_avg = [(list(deg_dict.values())[x] + list(deg_dict2.values())[x] + list(deg_dict3.values())[x])/3 for x in range(len(deg_dict.values()))]
#
# # Assortativity Plot
# plt.scatter(deg_dict_avg, neighbor_deg_avg, marker=".")
# plt.xlabel('Degree')
# plt.ylabel('Average neighbour degree')
# plt.show()
# # Pearson Correlation of Plot:
# print(numpy.corrcoef(list(deg_dict_avg), list(neighbor_deg_avg))[0, 1])


# Clustering Data
data = OrderedDict(sorted((cluster.clustering(graph)).items()))
data2 = OrderedDict(sorted((cluster.clustering(graph2)).items()))
data3 = OrderedDict(sorted((cluster.clustering(graph3)).items()))

# Clustering Calculation
values = [(list(data.values())[x] + list(data2.values())[x] + list(data3.values())[x])/3 for x in range(len(data.values()))]
values.sort()
keys = data.keys()

# Clustering Plot
plt.scatter(values, range(len(keys)), marker=".")
plt.ylabel('Cumulative frequency of Nodes')
plt.xlabel('Local Clustering Coefficient Value of Nodes')
# Twitter Annotation:
plt.annotate('Median value at 0.4 and 500', xytext=(0.45, 500), xy=(0.402, 500), arrowprops = {'facecolor':'red'})
# # Google+ Annotation:
예제 #16
0
 def avgClustering(self, W):
     from networkx.algorithms.cluster import clustering
     g = buildGraph(W, weighted=self.weighted)
     cl = clustering(g, weight='weight' if self.weighted else None)
     return centerMeasure(cl.values(), self.center)
예제 #17
0
def find_topics_topicness(g: nx.DiGraph, visualize=False) -> np.ndarray:
    """
    Computes a keyword-topic matrix containing memebership information of keywords w.r.t topics.

    
    Extended SUmmuray
    -----------------
    The computation of this matrix uses an iterative algorithm, which picks best nodes using a 'topicness'
    metric, derived from pagerank, local cluster coefficient, and betweenness centrality.

    Parameters
    ----------
    g : nx.DiGraph
        keyword co-occurrence graph

    Returns
    -------
    ndarray
        keyword-topic matrix, containing the information about keywords and topics. 
        specifically for the entry [ui,ti] if contains 1 if the topic is in 
    """

    n = g.number_of_nodes()
    m = g.number_of_edges()

    # get betweenness centrality
    betweenness = misc.dictionary_to_numpy(
        nx.betweenness_centrality(g.to_undirected(as_view=True)))

    # get personalized-pagerank using cluster coefficient
    c = cluster.clustering(g, weight="weight")
    z = sum(c.values())
    if z != 0:
        v = {ui: c[ui] / z for ui in range(n)}
        pagerank = misc.dictionary_to_numpy(
            nx.pagerank_numpy(g, personalization=v))
    else:
        pagerank = misc.dictionary_to_numpy(nx.pagerank_numpy(g))

    # compute topicness --------------------------
    pagerank: np.ndarray = pagerank / pagerank.max()  # normalize between [0,1]
    betweenness = betweenness / betweenness.max() if betweenness.max(
    ) != 0 else betweenness  # normalize between [0,1]
    topicness: np.ndarray = pagerank * (1 - betweenness)

    # normalize topicness between [0,1]
    topicness = topicness - topicness.min()
    topicness = topicness / (topicness.max())

    # normalize the edge weights
    influence.normalize_weights(g, mode="mixed")

    # compute area of influence for each node
    node_influence = np.zeros([n, n], dtype=float)
    for u in tqdm.trange(n):
        node_influence[u, :] = influence.linear_threshold_mean(g, {u}, 15)

    # estiamte topics
    topics_list, surface = list(), np.array(topicness)
    #for i in tqdm.trange(k):
    while (surface > 0).any():
        # compute topic influence
        fuzzy_topic = node_influence[surface.argmax(), :]
        crisp_topic = defuzzification(fuzzy_topic)

        # add topic to extracted topics
        topics_list.append(crisp_topic)

        # update surface
        surface = surface * (1 - fuzzy_topic)

    # fill node-topic matrix
    topic_number = len(topics_list)
    T = np.zeros([n, topic_number], dtype=float)  # node-topic matrix
    for i in range(topic_number):
        T[:, i] = topics_list[i]

    # visualize various information about the estiamted topics
    if visualize:
        coverage = T.sum(axis=1)
        sources = np.zeros([n])
        sources[T.argmax(axis=0)] = 1

        visualization.plot_edge_weights(g)
        visualization.show_graphfunction(g, topicness, with_labels=False)
        visualization.show_graphfunction(g, sources, with_labels=False)
        visualization.show_graphfunction(g, coverage, with_labels=False)
    return T
예제 #18
0
# coding: utf-8
import networkx
import igraph
graph_nx = networkx.waxman_graph(20)
graph_ig = igraph.Graph(len(graph_nx), list(zip(*list(zip(*networkx.to_edgelist(graph_nx)))[:2])))
import numpy as np
import networkx.algorithms.cluster as nx_cluster
cluster_nx = np.mean(list(nx_cluster.clustering(graph_nx).values()))
cluster_nx
nx_cluster.clustering(graph_nx).values()
graph_nx = networkx.waxman_graph(100)  
nx_cluster.clustering(graph_nx).values()
cluster_nx = np.mean(list(nx_cluster.clustering(graph_nx).values()))
cluster_nx
graph_ig = igraph.Graph(len(graph_nx), list(zip(*list(zip(*networkx.to_edgelist(graph_nx)))[:2])))
graph_ig.transitivity_avglocal_undirected()
graph_ig.transitivity_undirected()
graph_ig.transitivity_local_undirected()
cluster_ig = np.mean(graph_ig.transitivity_local_undirected())
cluster_ig
np.mean(graph_ig.transitivity_local_undirected())
graph_ig.transitivity_local_undirected()
cluster_ig = graph_ig.transitivity_local_undirected()
cluster_ig
np.nan_to_num(cluster_ig)
cluster_ig
cluster_ig = np.nan_to_num(cluster_ig)
cluster_ig
np.mean(cluster_ig)
nx_cluster.transitivity(graph_nx)