Ejemplo n.º 1
0
    def community(self, resolution=1):
        partition = community_louvain.best_partition(G, resolution=resolution)

        n_community = max(list(partition.values()))

        print('Modularity:', community_louvain.modularity(partition, G))
        print('Number of communities:', n_community)

        #drawing
        size = float(len(set(partition.values())))
        pos = nx.spring_layout(G)
        count = 0
        plt.figure(figsize=(12, 8))
        for com in set(partition.values()):
            count = count + 1.
            list_nodes = [
                nodes for nodes in partition.keys() if partition[nodes] == com
            ]
            nx.draw_networkx_nodes(G,
                                   pos,
                                   list_nodes,
                                   node_size=20,
                                   node_color=[random(),
                                               random(),
                                               random()])

        nx.draw_networkx_edges(G, pos, alpha=0.5, width=0.5)
        plt.show()

        return partition
def clustering(G):
    """
    Conduct clustering on graph
    :param G: networkx graph
    :return:
    """
    partition = community.best_partition(G)
    print()
    print("Clusters")
    num_clusts = max(partition.values())
    print("Num clusters = " + str(num_clusts))
    modValue = community.modularity(partition, G)
    print("modularity: {}".format(modValue))
Ejemplo n.º 3
0
def create_community_graph(nodes, edges, filename, scale):

    import community.community_louvain as community
    plt.figure(figsize=(20,20))

    G = nx.Graph()

    for node in nodes:
        G.add_node(node)

    edge_list = zip(edges['Source'], edges['Target'])

    G.add_edges_from(edge_list)

    for (a, b), val in zip(edge_list, edges['Label'].values):
        G[a][b]['label'] = val


    pos = nx.circular_layout(G, scale=50 )

    # use one of the edge properties to control line thickness
    edgewidth = edges['Weight']

    parts = community.best_partition(G)
    node_color = [parts.get(node) for node in G.nodes()]

    print("Louvain Modularity: ", community.modularity(parts, G))

#    nx.draw_networkx(G, pos = pos, cmap = plt.get_cmap("jet"), node_color = node_color, node_size = [float(G.degree(v)) * 200 for v in G])
    nx.draw_networkx_nodes(G, pos, cmap = plt.get_cmap("rainbow"), node_size=[float(G.degree(v)) * 700 for v in G], alpha=0.7, node_color=node_color, linewidths=0)
    nx.draw_networkx_edges(G, pos, alpha=0.25, edge_color='#0EA6EC', width=[w * scale for w in edgewidth], arrows=False)

#    arquivo edge_labels = {'{}'.format(i[2]['label']) for i in G.edges(data=True)}
#    edge_labels = nx.get_edge_attributes(G,'label')

#    nx.draw_networkx_edge_labels(G, pos, edge_labels = edge_labels)

    node_labels = {i:'{}'.format(i) for i in G.nodes()}
    nx.draw_networkx_labels(G, pos, labels = node_labels, font_color='#f5f5f5', font_weight='bold')

    axes = plt.gca()
    axes.set_axis_bgcolor('#f5f5f5')
#    plt.axis('off')
    axes.get_xaxis().set_visible(False)
    axes.get_yaxis().set_visible(False)
    plt.savefig(filename, dpi=150)
    plt.show

    return G
Ejemplo n.º 4
0
    def graph_partition(self,G,ret_df):
        m,n = ret_df.shape
        keywords = ret_df['KEY_WORDS']
        partition = community_louvain.best_partition(G,resolution = 2) #Louvain算法划分社区,返回的是一个dict,key是文档id value是文档所属社区,可见如果边过多计算时间慢
        modularity = community_louvain.modularity(partition,G)
        #print("社区划分的模块度是:{}".format(modularity))
        #resolution 控制社区内数目的大小,越小社区数目越小,resolution可以让供热问题聚类为一类
        comm_dict = defaultdict(list)
        for doc in partition:
            comm_dict[partition[doc]].append(doc)#[{O:[node1,...,node2]}] {'社区id':"文档list"}
        num_comm = len(comm_dict)
        #print("社区数目为:{}".format(num_comm))

        self.labels_true = np.empty(shape=[m])#各条举报数据的label GT
        labels_pred = np.empty(shape=[m])#以同一community中多数的label为这一community的label
        ret_types = list(ret_df['TYPE'])
        for i,d_type in enumerate(ret_types):
            key_list = d_type.split("-")
            if(len(key_list)>3):
                key = key_list[2]
            else:
                key = key_list[-1]
            idx = self.labels_dict[key]
            self.labels_true[i] = idx
        self.labels_true = np.array(self.labels_true)

        '''
        #其实对于兰德系数和互信息并不要求给聚类的元素打label,而只要求他们原本是同一个类别现在聚成同一个簇就好了
        for com_id,com_members in comm_dict.items():
            members_labes = labels_true[com_members]
            com_label = collections.Counter(members_labes).most_common()[0][0]#聚类后聚类id如何和参考id 匹配
            #上面的方法只能说相同的类别容易聚成一个类别,但比如商品质量就聚成了很多个类别??
            labels_pred[com_members] = com_label
            print("社区的id:{}".format(com_id))
            print("社区的类别:{}".format(rev_labels_dict[com_label]))
        '''
        for com_id,com_members in comm_dict.items():
            labels_pred[com_members] = np.random.randint(0,num_comm)#
            labels_pred[com_members] = com_id#同一个社区的文档预测的是同一个id

        scores = self.Eva_Metric(self.labels_true,labels_pred)
        print(self.getRealDocs(list(keywords)))
        #print(scores)
        return comm_dict,scores 
Ejemplo n.º 5
0
    def find_best_partition(self):
        from community import community_louvain
        G = self.graph.copy()
        modularity = -float('inf')
        removed_edges = []
        partition = {}
        cou = 0
        while cou < 40:
            cou += 1
            betweenness = self.calculte_betweenness(G)  # 1.算介度
            max_betweenness_edges = self.get_max_betweenness_edges(
                betweenness)  # 2.根据介度算最大介度的边的集合
            if len(G.edges()) == len(max_betweenness_edges):  # 介度全部都一样,退出
                break

            G.remove_edges_from(max_betweenness_edges)  # 将最大介度的边全部移除
            components = nx.connected_components(G)  # 获得连通的所有components
            idx = 0
            tmp_partition = {}
            for component in components:
                for inner in list(component):
                    tmp_partition.setdefault(inner, idx)  # 先获得暂时的分区,按顺序递增
                idx += 1
                print('IDX=', idx)
            cur_mod = community_louvain.modularity(tmp_partition,
                                                   G)  # 调用louvain的模块算模块度
            print("CUR MOD=", cur_mod, 'while modularity=', modularity)
            if cur_mod < modularity:  # or abs(cur_mod - modularity) < eps: # 模块度小了,说明不能再划分,则此次分割无效,要补回去,并且退出
                G.add_edges_from(max_betweenness_edges)
                break
            else:
                partition = tmp_partition
            #partition = tmp_partition
            removed_edges.extend(max_betweenness_edges)  # 删掉的最大介度的边集合,不断更新
            modularity = cur_mod
            self.display(partition)
        print('COUNT', cou)
        return partition, G, removed_edges
Ejemplo n.º 6
0
            file.write("Freeman_centralization\tnan \n")

        ### Mean closeness centrality ###
        closeness_cent = nx.closeness_centrality(G)
        num_holder = []
        for key, value in closeness_cent.items():
            num_holder.append(value)
        mean_closeness_cent = mean(num_holder)
        file.write("Mean_closeness_centrality\t" +
                   str(round(mean_closeness_cent, 5)) + "\n")

        ### Modularity ###
        p = community_louvain.best_partition(G,
                                             random_state=1,
                                             randomize=False)
        Q = community_louvain.modularity(p, G)
        print("Modularity of best partition of graph: ", str(Q))
        file.write("Modularity\t" + str(round(Q, 5)) + "\n")

        ### Median comp size over total number of nodes ###
        med_list = []
        for g in nx.connected_component_subgraphs(G):
            med_list.append(nx.number_of_nodes(g))
        med_over_nnodes = median(med_list) / nnodes
        file.write("Median_comp_size_over_#_nodes\t" +
                   str(round(med_over_nnodes, 5)) + "\n")

        ### Degree assortativity ###
        degree_assortativity = nx.degree_assortativity_coefficient(G)
        file.write("Degree_assortativity\t" +
                   str(round(degree_assortativity, 5)) + "\n")
Ejemplo n.º 7
0
def community_detector(algorithm_name,network,most_valualble_edge=None):
    if Helpers().is_weighted(network):
        weight = 'weight'
    else:
        weight = None
    if algorithm_name == 'louvain':
        par_var = community_louvain.best_partition(network,weight=weight)
        num_partitions = len(set(par_var.values()))
        modularity = community_louvain.modularity(par_var,network)
        cliques_dict = {}
        for i in range(0,num_partitions):
            cliques_dict[i] = []
        for key,value in par_var.items():
            cliques_dict[value].append(key)

        partition = [lst for lst in cliques_dict.values()]

        return {'num_partitions': num_partitions,
                      'modularity' : modularity,
                      'partition' : partition}

    elif algorithm_name == 'girvin_newman':
        gen_par = nx.algorithms.community.centrality.girvan_newman(network,most_valualble_edge)
        mod = 0
        num_partitions = 0
        partition = None
        i=0
        list_of_losers = []
        while True:
            try:
                if len(list_of_losers) > 100:
                    break
                i+=1
                dic = {}
                temp_partition = next(gen_par)
                for index, community in enumerate(temp_partition):
                    dic[index] = list(community)
                temp_mod = Helpers().modularity(network, dic)
                if temp_mod > mod:
                    mod = temp_mod
                    num_partitions = len(dic)
                    partition = [v for v in dic.values()]
                    list_of_losers = []
                else:
                    list_of_losers.append(temp_mod)
            except StopIteration:
                break
        return {'num_partitions': num_partitions,
               'modularity': mod,
               'partition': partition}

    elif algorithm_name == 'clique_percolation':
        flag = True
        modularity = 0
        num_p = 0
        par = None
        for num in range(2,len(network.nodes())):
            try:
                if flag == False:
                    break
                clique_per = nx.algorithms.community.k_clique_communities(network,num)
                clique_per = Helpers().unpack_gen(clique_per)
                temp_mod = Helpers().modularity(network,clique_per.copy())
                if temp_mod > modularity:
                    modularity = temp_mod
                    num_p = len([i for i in clique_per.values()])
                    par = [val for val in clique_per.values()]
            except IndexError:
                flag = False
        return {'num_partitions': num_p,
               'modularity': modularity,
               'partition': par}
Ejemplo n.º 8
0
# nx.draw(G, pos=nx.circular_layout(G), node_color='r', edge_color='b')
# plt.show()
print("getting the graph end")

# Compute the best partition
print("partitioning start")

partition = community_louvain.best_partition(G)

print("partitioning end")

# Store the partition dictionary to file
print("Store the partition dictionary to file start ")

outFile = open('partition.txt', 'w')
for nid in partition:
    community = partition[nid]
    ss = '%d,%d\n' % (nid, community)
    outFile.write(ss)
outFile.close()

print("Store the partition dictionary to file end")

# Number of communities
# 16172
print(len(set(partition.values())))

# Modularity of the partition
# 0.9377583051376279
print(community_louvain.modularity(partition, G))
def NetworkAnalysis(G, filename='highRatingResults.txt'):
    #standard metrics local metrics
    nbr_nodes = nx.number_of_nodes(G)
    nbr_edges = nx.number_of_edges(G)
    nbr_components = nx.number_connected_components(G)
    F = open(filename, 'w')

    #    t1 = "Number of nodes:" + str(nbr_nodes)
    #    t2 = "Number of edges:" + str(nbr_edges)
    #    t3 = "Number of connected components:" + str(nbr_components)
    F.write("Number of nodes:" + str(nbr_nodes) + "\n")
    F.write("Number of edges:" + str(nbr_edges) + "\n")
    F.write("Number of connected components:" + str(nbr_components) + "\n")

    # F.close()

    #betweeness
    betweenList = nx.betweenness_centrality(G)
    #print("The list of betweenness centrality is", str(betweenList), "\n")
    F.write("The list of betweenness centrality is" + str(betweenList) + "\n")
    #all the items have less than 1 betweenness centrality which indicate that there is no
    #item that lie inbetween the connection between two items.

    #degree
    degreeCentrality = nx.degree_centrality(G)
    F.write("The degrees of centrality is " + str(degreeCentrality) + "\n")

    #clustering coefficient
    #clustering coefficient for each nodes
    F.write("The clustering coefficients are " + str(nx.clustering(G)) + "\n")

    partition = community_louvain.best_partition(G)
    F.write("The community modularity is " +
            str(community_louvain.modularity(partition, G)) + "\n")
    #which suggest that there isn't a strong community

    #global network metrics (metric to explain whole network not just a part)
    #diameter - the max of shortest distances between nodes
    F.write("The diameter is " + str(nx.diameter(G)) + "\n")

    #density
    F.write("The density is " + str(nx.density(G)) + "\n")
    #not particularly low nor high in density

    #triangles
    F.write("The triangle is " + str(nx.triangles(G)) + "\n")

    #average clustering coefficient for the graph
    avgclu = nx.average_clustering(G)
    F.write("The average clustering is " + str(avgclu) + "\n")
    #average degree centrality
    tot = []
    for food in degreeCentrality:
        item = degreeCentrality[food]
        tot.append(item)
        avgdeg = np.average(tot)
    F.write("The average degree centrality is " + str(avgdeg) + "\n")

    #average betweenness centrality
    l = []
    for f in betweenList:
        item = betweenList[f]
        l.append(item)
        avgB = np.average(l)
    F.write("The average betweenness centrality is " + str(avgB) + "\n")
    F.close()
flows.add_edges_from(significant_flows)
flows.remove_edges_from(flows.selfloop_edges())

#find 5 largest betweenness centralities
largest_betweeness = pd.Series(nx.betweenness_centrality(flows)).nlargest(5)
#find 5 largest closeness centralities
largest_closeness = pd.Series(nx.closeness_centrality(flows)).nlargest(5)

#find number of connected componants
num_of_connected_componants = len([nx.connected_components(flows)])

#find communities in the network and the modularity
partition = cm.best_partition(flows)

communities = pd.Series(partition)
communities = pd.DataFrame({
    'States': communities.index,
    'Community': communities.values
})

num_of_communities = len(communities['Community'].unique())

communtiy0states = communities[communities['Community'] == 0]['States']
communtiy1states = communities[communities['Community'] == 1]['States']
communtiy2states = communities[communities['Community'] == 2]['States']

modularity = cm.modularity(partition, flows)

#save graph
nx.write_graphml(flows, open('SignificantStatetateMigrationFlows.graphml',
                             'wb'))
#
# * 위의 네트워크 지도에서는 커뮤니티를 찾기 힘듭니다. 아래에서 단어들 간의 커뮤니티를 찾아 그래프로 출력해보겠습니다.
#
# * 커뮤니티를 찾기 위해 Louvain 알고리즘을 사용하겠습니다. 이 알고리즘은 다른 알고리즘과는 달리 계산 시간이 빠르다는 장점으로 많이 이용되고 있습니다.

# In[44]:

# 커뮤니티를 찾기 위한 modularity 를 계산하겠습니다.

from community import community_louvain

# 노드 속성을 기초로 파티션을 나누겠습니다.
partition = community_louvain.best_partition(travel_network)

# partition 값을 통해 노드가 잘 구분되어 있는지를 계산하겠습니다.
modularity = community_louvain.modularity(partition, travel_network)
print('Modularity:', modularity)

# In[45]:

plt.figure(figsize=(25, 20))

colors = [partition[n] for n in travel_network.nodes()]
my_colors = plt.cm.Set3_r
nx.draw(travel_network,
        with_labels=True,
        labels=travel_id2word,
        font_family='Malgun Gothic',
        node_color=colors,
        cmap=my_colors,
        font_size=14,
Ejemplo n.º 12
0
 def modularity(self):
     partition = community_louvain.best_partition(self)
     return community_louvain.modularity(partition, self)
Ejemplo n.º 13
0
                           pos,
                           list_nodes,
                           node_size=50,
                           node_color=str(count / size))

    nx.draw_networkx_edges(G, pos, with_labels=True, alpha=0.5)
    plt.show()
    values = [partition.get(node) for node in G.nodes()]
    nx.draw_spring(G,
                   cmap=plt.get_cmap('jet'),
                   node_color=values,
                   node_size=30,
                   with_labels=False)
    plt.show()
    # Clustering evaluation
    print('Modularity: {0:.4f}'.format(community.modularity(partition, G)))

    # if display_plots:
    fx.display_partition(G, partition, 2)

    #Calculate induced graph
    ind = community.induced_graph(partition, G)

#Display induced graph
if display_plots:
    plt.figure(3)
    pos = nx.random_layout(ind)
    nx.draw_networkx_edges(ind, pos, alpha=0.5)
    nx.draw_networkx_nodes(ind, pos, node_size=40, node_color='1')
    plt.show()
Ejemplo n.º 14
0
    number_of_community = len(community_list)

    mat_size = np.max(community_list) + 1
    mod = [[0.0] * mat_size for i in range(mat_size)]

    for i in community_list:
        tmp = copy.deepcopy(partition)
        #print(tmp)
        for j in community_list:
            #print(tmp)
            if (i != j):
                for key, values in partition.items():
                    if (values == i):
                        tmp[key] = j
                mod[i][j] = community_louvain.modularity(tmp, dolphins_data)

    mod = np.array(mod)  #Convert to array
    max = 0  #Initial Value to find Max modularity
    max_index = (0, 0)  #to find index of max modularity

    for i in range(len(mod)):
        for j in range(len(mod[0])):
            if (mod[i][j] > max):
                max = mod[i][j]
                max_index = (i, j)

    #Merge Step
    tmp = copy.deepcopy(partition)
    #print(max_index)
    for key, values in tmp.items():
Ejemplo n.º 15
0
partition = community_louvain.best_partition(G)
while True:
    community_labels = set(partition.values())
    if len(community_labels) <= 2:
        break

    max_val, max_indx = -1 * float('inf'), (None, None)

    for i in community_labels:
        temp = deepcopy(partition)
        for j in community_labels:
            if i != j:
                for key in partition:
                    if (partition[key] == j):
                        temp[key] = i
                val = community_louvain.modularity(temp, G)
                if val > max_val:
                    max_val = val
                    max_indx = (i, j)

    temp = deepcopy(partition)
    for key in temp:
        if partition[key] in max_indx:
            partition[key] = max_indx[0]

final_labels = tuple(set(partition.values()))

group_1 = list(filter(lambda x: partition[x] == final_labels[0], partition))
group_2 = list(filter(lambda x: partition[x] == final_labels[1], partition))

# pos = nx.spring_layout(G)
Ejemplo n.º 16
0
def print_modularity(G, partition, title):
    mod = community_louvain.modularity(partition, G)
    print('<' + title + '>')
    print("modularity:", mod)
    return