예제 #1
0
def sub_clusterization_experiment(G1):
    p1 = partition(G1)
    print "Modularity: " + str(community.modularity(p1, G1))

    sizes = clustersSizes(p1)
    ordered = sorted(range(len(sizes)), key=lambda k: sizes[k], reverse=True)
    p1 = splitCluster(G1, p1, ordered[0])
    print "Modularity: " + str(community.modularity(p1, G1))
    sizes = clustersSizes(p1)
    ordered = sorted(range(len(sizes)), key=lambda k: sizes[k], reverse=True)
    p1 = splitCluster(G1, p1, ordered[0])
    print "Modularity: " + str(community.modularity(p1, G1))

    ordered = sorted(range(len(sizes)), key=lambda k: sizes[k])
    while (True):
        word = raw_input("palavra: ")
        s = G1.nbunch_iter([i[0] for i in p1.items() if i[1] == p1[findNode(G1, word)]])
        try:
            for i in s:
                print G1.node[i]["text"] + ", ",
            print
        except:
            pass

    plt.plot(range(1, len(clustersSizes(p1)) + 1), sorted(clustersSizes(p1), reverse=True))
    plt.xlabel("No. do Cluster")
    plt.ylabel("Tamanho do Cluster")
    # plt.title(u"Quantidade de nós para cada cluster (subclusterizando C1)")
    plt.title(u"Quantidade de nós para cada cluster (subclusterizando C1 e C2)")
    plt.show()
예제 #2
0
 def __InitClusterAnalysis(self):
           
     print "starting best m_partition algorithm (will take a while)...."
     if self.m_classifier==None:
         classifier = classifier_c.classifier_c(self.m_graph,"best_practice")
         self.m_partition = classifier.run_classifier(classifier_c.classifier_type_e.e_bestPractice)  #community.best_partition(self.m_graph)
         modularity = community.modularity(self.m_partition,self.m_graph)
         self.LogPrint("the modularity is %f"%modularity)
     else:
         self.m_partition=self.m_classifier.classifey()
     if self.m_partition==None:
         self.LogPrint("partition is NULL...will not create cluster, exit")
         return
     #__LogPrint(self,"the modularity is %f"%modularity)
     else:
         for node in self.m_partition.iteritems():
             if self.m_comSize.has_key(node[1]):
                 self.m_comSize[node[1]]= self.m_comSize[node[1]]+1
                 self.m_comMem[node[1]].append(node[0])
             else:
                 self.m_comSize[node[1]]=1
                 self.m_comMem[node[1]]=[]
     for cSize in self.m_comSize.iteritems():
         if cSize[1] >1:
             self.LogPrint("cSize[1]=%d"%cSize[1])
             self.m_comsizeClean[cSize[0]] =cSize[1]
             if len(self.m_comMem[cSize[0]])==1:
                 self.LogPrint( "have value is only one member...")
             self.m_comMemClean[cSize[0]] = self.m_comMem[cSize[0]]
            
     for memberIDs in self.m_comMemClean.iteritems():
         self.m_comMemNames[memberIDs[0]]=[]
         for member in memberIDs[1]:
             self.m_comMemNames[memberIDs[0]].append(utils.GetNodeName(member,self.m_graph))  
예제 #3
0
    def test_karate(self):
        """"test modularity on Zachary's karate club"""
        graph = nx.karate_club_graph()
        part = co.best_partition(graph)
        self.assertTrue(co.modularity(part, graph) > 0.41)

        for e1, e2 in graph.edges_iter():
            graph[e1][e2]["test_weight"] = 1.

        part_weight = co.best_partition(graph, weight="test_weight")
        self.assertAlmostEqual(co.modularity(part, graph),
                               co.modularity(part_weight, graph, "test_weight"))

        part_res_low = co.best_partition(graph, resolution=0.1)
        self.assertTrue(
            len(set(part.values())) < len(set(part_res_low.values())))
예제 #4
0
def printStats(filename):
	'''
	Converts json adjacency list into networkx to calculate and print the
	graphs's 
	  - average clustering coefficient
	  - overall clustering coefficient
	  - maximum diameter
	  - average diameter
	  - number of paritions using community.best_parition
	  - modularity of community.best_partition
	'''
	g = makeGraphFromJSON(filename)
	
	print "Average Clustering Coefficient: %f" % nx.average_clustering(g)
	print "Overall Clustering Coefficient: %f" % nx.transitivity(g)
	
	connected_subgraphs = list(nx.connected_component_subgraphs(g))
	largest = max(nx.connected_component_subgraphs(g), key=len)
	print "# Connected Components: %d" % len(connected_subgraphs)
	print "    Maximal Diameter: %d" % nx.diameter(largest)
	print "    Average Diameter: %f" % nx.average_shortest_path_length(largest)

	# Find partition that maximizes modularity using Louvain's algorithm
	part = community.best_partition(g)	
	print "# Paritions: %d" % (max(part.values()) + 1)
	print "Louvain Modularity: %f" % community.modularity(part, g)
def getRandomPageRanks(filename):
	Ga=nx.read_graphml(sys.argv[1])

	# create a copy of the graph and extract giant component
	# get component size distribution
	cc=nx.connected_components(Ga)
	cc_dict={}
	for x in range(0,len(cc)):
		try:
			cc_dict[len(cc[x])].append(x)
		except KeyError:
			cc_dict[len(cc[x])]=[]
			cc_dict[len(cc[x])].append(x)

	isolates=nx.isolates(Ga)

	rg=nx.fast_gnp_random_graph(Ga.number_of_nodes(),2.0*Ga.number_of_edges()/(Ga.number_of_nodes()*(Ga.number_of_nodes()-1)))
	c_rg=nx.average_clustering(rg)
	rg_cc=nx.connected_component_subgraphs(rg)[0]
	rg_asp=nx.algorithms.shortest_paths.generic.average_shortest_path_length(rg_cc)

	p_rg=community.best_partition(rg_cc)
	m_rg=community.modularity(p_rg,rg_cc)

	pageranks = nx.pagerank_numpy(rg)
	return pageranks
def get_communities(graph):
	betweenness = nx.edge_betweenness_centrality(graph)
	sorted_betweeness = [x[0] for x in sorted(betweenness.items(), key = lambda x : x[1], reverse = True)]
	best_partitions = []
	max_modularity = -1.0
	graph_copy = graph.copy()
	while sorted_betweeness:
		communities = [list(x) for x in nx.connected_components(graph_copy)]
		partitions = {}
		for i in range(len(communities)):
			for node in communities[i]:
				partitions[node] = i
		modularity = community.modularity(partitions, graph_copy)
		if modularity > max_modularity:
			best_partitions = communities
			max_modularity = modularity
		elif modularity <= max_modularity:
			break;
		graph_copy.remove_edge(*sorted_betweeness[0])
		del sorted_betweeness[0]
	for partition in best_partitions:
		print sorted(partition)
	val_map = {}
	for partition in best_partitions:
		value = random.random()
		while value in val_map.values():
			value = random.random()
		for node in partition:
			val_map[node] = value
	values = [val_map.get(node) for node in graph.nodes()]
	nx.draw_spring(graph, node_color = values, node_size = 500, with_labels = True)
	plt.savefig(sys.argv[2])
예제 #7
0
def InitClusterAnalysis(graph):
    global comMemClean
    global comMemNames
    global comsizeClean
    global partition
    print "starting best partition algorithm (will take a while)...."
    partition = community.best_partition(graph)
    modularity = community.modularity(partition, graph)
    LogPrint("the modularity is %f"%modularity)
    if partition !=None:
        for node in partition.iteritems():
            if comSize.has_key(node[1]):
                comSize[node[1]]= comSize[node[1]]+1
                comMem[node[1]].append(node[0])
            else:
                comSize[node[1]]=1
                comMem[node[1]]=[]
    for cSize in comSize.iteritems():
        if cSize[1] >1:
            print "cSize[1]=",cSize[1]
            comsizeClean[cSize[0]] =cSize[1]
            if len(comMem[cSize[0]])==1:
                print "way this value is only one member...",comMem[cSize[0]]
            comMemClean[cSize[0]] = comMem[cSize[0]]
    
    for memberIDs in comMemClean.iteritems():
        comMemNames[memberIDs[0]]=[]
        for member in memberIDs[1]:
            comMemNames[memberIDs[0]].append(utils.GetNodeName(member,graph))  
def compute_best_community(original_g):
    max_modularity = -1
    total_nodes = nx.number_of_nodes(original_g)
    community_count = 1
    g = original_g
    communities = []

    # Generate all the communities: Loop thru taking the entire graph as 1 community to each node as a seperate community
    while community_count < total_nodes:
        betweenness = nx.edge_betweenness(g)
        max_betweenness = max(betweenness.iteritems(), key=operator.itemgetter(1))[0]
        g.remove_edge(max_betweenness[0], max_betweenness[1])
        connected_subgraphs = nx.connected_components(g)

        connected_subgraphs_list = convert_generator_list(connected_subgraphs)

        community_dict = categorize_nodes(connected_subgraphs_list)

        modularity = community.modularity(community_dict, original_g)

        if modularity > max_modularity:
            max_modularity = modularity
            communities = list(connected_subgraphs_list)
        community_count += 1

    communities = format_list(communities)

    return communities, max_modularity
예제 #9
0
def get_community_assignment(in_df, graph, dendrogram):
    '''
    Utilize dendrogram to find community clusterings at every level
    available. For each hierarchy level, a new column is added to the
    returned df with the community clustering. (e.g. cid0 -> 0,0,1,2,3)

    in_df: Dataframe. Must be indexed by user_id.
    graph: Networkx Graph. Node IDs should match user_ids in dataframe
    dendrogram: List of dictionaries, each dictionary mapping user_id to
    community_id. Each dictionary should represent a level of the clustering
    hierarchy.

    return: Tuple of Dataframe with community id assignment columns added
    and dictionary mapping each level to community modularity (float)
    '''
    df = in_df.copy()

    community_modularity = {}

    for i in range(len(dendrogram)):

        partition = partition_at_level(dendrogram, i)

        # Infrequently, the community detection algorithm will exclude (?) a
        # a user ID or two. Still investgating why. For now, these will be
        # placed into partition 0.
        df['cid' + str(i)] = [partition[ind] if ind in partition else 0
                              for ind in df.index]

        community_modularity[i] = modularity(partition, graph)

    return df, community_modularity
예제 #10
0
def get_topics_noun_phrases(num_news, draw=False, url='http://cnn.com'):

    texts = get_news(url, num_news)

    gb = NounPhraseGraphBuilder(text_processing.clean_punctuation_and_stopwords)
    gb.load_texts(texts)
    G = gb.create_graph()
    print "Graph built"

    partition = community.best_partition(G)
    words_by_part = get_words_by_partition(partition)

    print_topics_from_partitions(G, words_by_part, 10)

    mod = community.modularity(partition,G)
    print("modularity:", mod)

    #print_topics_from_partitions(G, words_by_part, 10)
    if draw:
        values = [partition.get(node) for node in G.nodes()]
        nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False)
        plt.show()

    topics = get_topics_from_partitions(G, words_by_part, 10)

    return G, topics
예제 #11
0
def louvain_method(G):
    partition = community.best_partition(G)
    print "Graph nodes:", len(G.nodes()), "egdes:", len(G.edges())
    print "Partitions:", len(set(partition.values())),\
          "Modularity:", community.modularity(partition, G.to_undirected())
    print "\n\n"
    return partition
예제 #12
0
    def find_best_partition(self):
        G = self.graph.copy()
        modularity = 0.0
        removed_edges = []
        partition = {}
        while 1:
            betweenness = self.calculte_betweenness(G)
            max_betweenness_edges = self.get_max_betweenness_edges(betweenness)
            if len(G.edges()) == len(max_betweenness_edges):
                break

            G.remove_edges_from(max_betweenness_edges)  
            components = nx.connected_components(G)
            idx = 0
            tmp_partition = {}
            for component in components:
                for inner in list(component):
                    tmp_partition.setdefault(inner, idx)
                idx += 1
            cur_mod = community.modularity(tmp_partition, G)

            if cur_mod < modularity:
                G.add_edges_from(max_betweenness_edges)
                break;
            else:
                partition = tmp_partition
            removed_edges.extend(max_betweenness_edges)
            modularity = cur_mod
        return partition, G, removed_edges
 def test_allin_is_zero(self):
     """it test that everyone in one community has a modularity of 0"""
     for i in range(self.numtest) :
         g = nx.erdos_renyi_graph(50, 0.1)
         part = dict([])
         for node in g :
             part[node] = 0
         self.assertEqual(co.modularity(part, g), 0)
예제 #14
0
def run_Louvain(fNet, fMask, fOutImg, fOutInfo):
    '''
    A wrapper function for network community detection by the Louvain method.
    Only the largest connected component is parcellated into modules.
    
    input parameters:
          fNet:     the adjacency list filename for the network
          fMask:    the filename for the mask image. Its header
                    is used to create a modular parcellation image
          fOutImg:  the filename for the output image with modular
                    parcellation
          fOutInfo: the filename with information on modules and 
                    modularity.
    returns:
          NONE
    
    output:
          This function generates files recording modular parcellation.
               fOutImg:    Modular parcellation image
               fOutInfo:   Modular parcellation information as a numpy .npz file.
                           It includes:
                              Q:      The modularity Q
                              NMods:  The number of modules
                              ModID:  Module ID
                              NNodes: The number of nodes in a module. In the same
                                      order as ModID
    '''
    
    # loading the network data
    G = nx.read_adjlist(fNet, nodetype=int)
    # just the largest subgraph
    GC = max(nx.connected_component_subgraphs(G), key=len)
    # computing the best partition
    partition = community.best_partition(GC)
    # calculating the modularity
    Q = community.modularity(partition, GC)
    # converting the partition into arrays
    VoxInd = [int(i) for i in partition.keys()]
    ModInd = np.array(list(partition.values()))+1  # the module number starts with 1
    # calculating sizes of the modules
    NMods = np.max(ModInd)
    ModID = range(1,NMods+1)
    NNodes = []
    for iMod in ModID:
        tmpNNodes = len(np.nonzero(ModInd == iMod)[0])
        NNodes.append(tmpNNodes)
    # reading in the mask image header & data
    img_mask = nib.load(fMask)
    X_mask = img_mask.get_data()    
    # organizing the output
    Xout = np.zeros_like(X_mask)
    VoxXYZ = np.unravel_index(VoxInd, X_mask.shape)
    Xout[VoxXYZ] = ModInd
    # writing out the image
    modimg = nib.Nifti1Image(Xout, img_mask.get_affine())
    nib.save(modimg, fOutImg)
    # writing out module stats
    np.savez(fOutInfo, Q=Q, NMods=NMods, ModID=ModID, NNodes=NNodes)
예제 #15
0
def networkx_json(nodes, G):
    links = G.edges(data = True)
    groups = community.best_partition(G)
    part = reduce(lambda x, y : x if x>groups[y] else groups[y], groups, 0) + 1
    modular = community.modularity(groups,G)
    groups = { nodes[x]: groups[nodes[x]] if nodes[x] in groups else part for x in nodes }
    lsN = map(lambda x : { "id" : nodes[x], "name" : x, "group" : groups[nodes[x]] }, nodes)
    lsL = map(lambda x : { "source" : x[0], "target" : x[1], "value" : x[2]['wight'] }, links)
    return { "nodes" : lsN, "links" : lsL, "modular": round(modular, 2), "community": part + 1 }
 def test_modularity_increase(self):
     """
     Generate a dendogram and test that modularity is always increasing
     """
     g = nx.erdos_renyi_graph(1000, 0.01)
     dendo = co.generate_dendogram(g)
     mod_prec = -1.
     mods = [co.modularity(co.partition_at_level(dendo, level), g) for level in range(len(dendo)) ]
     self.assertListEqual(mods, sorted(mods))
def communities(net):
    parti = community.best_partition(net)
    modu = community.modularity(parti, net)
    com = parti.values()
    number_of_communities = max(com)+1
    hist = np.histogram(com,bins=range(0,number_of_communities+1),density=True)[0]
    values = np.square(hist)
    repartition = 1 / (sum(values))
    return (number_of_communities,'number_of_communities'),(repartition,'equivalent_number_of_communities'),(modu,'modularity_Louvain_partition')
예제 #18
0
def q2():
  print "[2] compare modularity values"
  cuisines = ["SoutheastAsian", "NorthAmerican"]
  for cuisine in cuisines:
    G = nx.read_gexf("../../data/network-analysis/%sHW4.gexf" % (cuisine))
    H = G.to_undirected(reciprocal=False)
    part = community.best_partition(H)
    mod = community.modularity(part, H)
    print "...", cuisine, "modularity =", mod
예제 #19
0
def gen_graph_stats (graph):
	G = nx.read_graphml(graph)
	stats = {}

	edges, nodes = 0,0
	for e in G.edges_iter(): edges += 1
	for n in G.nodes_iter(): nodes += 1
	stats['Edges'] = (edges,'The number of edges within the Graph')
	stats['Nodes'] = (nodes, 'The number of nodes within the Graph')
	print "%i edges, %i nodes" % (edges, nodes)


	# Accessing the highest degree node
	center, degree = sorted(G.degree().items(), key=itemgetter(1), reverse=True)[0]
	stats['Center Node'] = ('%s: %0.5f' % (center,degree),'The center most node in the graph. Which has the highest degree')


	hairball = nx.subgraph(G, [x for x in nx.connected_components(G)][0])
	print "Average shortest path: %0.4f" % nx.average_shortest_path_length(hairball)
	stats['Average Shortest Path Length'] = (nx.average_shortest_path_length(hairball), '')
	# print "Center: %s" % G[center]

	# print "Shortest Path to Center: %s" % p


	print "Degree: %0.5f" % degree
	stats['Degree'] = (degree,'The node degree is the number of edges adjacent to that node.')

	print "Order: %i" % G.number_of_nodes()
	stats['Order'] = (G.number_of_nodes(),'The number of nodes in the graph.')

	print "Size: %i" % G.number_of_edges()
	stats['Size'] = (G.number_of_edges(),'The number of edges in the graph.')

	print "Clustering: %0.5f" % nx.average_clustering(G)
	stats['Average Clustering'] = (nx.average_clustering(G),'The average clustering coefficient for the graph.')

	print "Transitivity: %0.5f" % nx.transitivity(G)
	stats['Transitivity'] = (nx.transitivity(G),'The fraction of all possible triangles present in the graph.')

	part = community.best_partition(G)
	# values = [part.get(node) for node in G.nodes()]

	# nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False)
	# plt.show()

	mod = community.modularity(part,G)
	print "modularity: %0.5f" % mod
	stats['Modularity'] = (mod,'The modularity of a partition of a graph.')

	knn = nx.k_nearest_neighbors(G)
	print knn
	stats['K Nearest Neighbors'] = (knn,'the average degree connectivity of graph.\nThe average degree connectivity is the average nearest neighbor degree of nodes with degree k. For weighted graphs, an analogous measure can be computed using the weighted average neighbors degre')


	return G, stats
예제 #20
0
 def do_a_series_of_propagations(self):
     index = 0
     while index < self.rounds and self.flag:
         index = index + 1
         print("Label propagation round: " + str(index))
         self.do_a_propagation()
     print("")
     print("Modularity is: " +
           str(round(modularity(self.labels, self.graph), 3)) + ".")
     json_dumper(self.labels, self.args.assignment_output)
예제 #21
0
 def evaluate(self):
     """
     Computes the modularity.
     
     :return: modularity
     """
     modularity = community.modularity(self.clusters_labels, self.Graph)
     self.modularity = modularity
     #x = internalValidation(self.adj_matrix, labels)
     return self.modularity
 def test_range(self):
     """test that modularity is always between -1 and 1"""
     for _ in range(self.number_of_tests):
         graph = nx.erdos_renyi_graph(50, 0.1)
         part = dict([])
         for node in graph:
             part[node] = random.randint(0, self.number_of_tests / 10)
         mod = co.modularity(part, graph)
         self.assertGreaterEqual(mod, -1)
         self.assertLessEqual(mod, 1)
예제 #23
0
def q3():
  print "[4] compare community stats before and after edge thresholding"
  G = nx.read_gexf("../../data/network-analysis/complements.gexf")
  H = G.to_undirected()
  part_before = community.best_partition(H)
  num_comm_before = len(set([x[1] for x in part_before.iteritems()]))
  mod_before = community.modularity(part_before, H)
  print("before thresholding, #-communities=%d, modularity=%f" % 
    (num_comm_before, mod_before))
  edges_before = H.edges(data=True)
  for edge in edges_before:
    weight = edge[2]["weight"]
    if weight < 0.2: 
      H.remove_edge(edge[0], edge[1])
  part_after = community.best_partition(H)
  num_comm_after = len(set([x[1] for x in part_after.iteritems()]))
  mod_after = community.modularity(part_after, H)
  print("after thresholding, #-communities=%d, modularity=%f" % 
    (num_comm_after, mod_after))
예제 #24
0
def run_louvain(g_sym, res, skeleton_labels):
    out_dict = cm.best_partition(g_sym, resolution=res)
    modularity = cm.modularity(out_dict, g_sym)
    partition = np.array(itemgetter(*skeleton_labels)(out_dict))
    part_unique, part_count = np.unique(partition, return_counts=True)
    for uni, count in zip(part_unique, part_count):
        if count < 3:
            inds = np.where(partition == uni)[0]
            partition[inds] = -1
    return partition, modularity
 def test_range(self) :
     """test that modularity is always between -1 and 1"""
     for i in range(self.numtest) :
         g = nx.erdos_renyi_graph(50, 0.1)
         part = dict([])
         for node in g :
             part[node] = random.randint(0, self.numtest/10)
         mod = co.modularity(part, g)
         self.assertGreaterEqual(mod, -1)
         self.assertLessEqual(mod, 1)
def calc_modularity(G, nodal_partition):
    '''
    A function that calculates modularity from the best partition
    of a graph using the louvain method
    '''
    import community

    modularity = community.modularity(nodal_partition, G)

    return modularity
예제 #27
0
def modularity(G, assignments=None, best_partition=False):
    if best_partition:
        part = community.best_partition(G)
    elif assignments:
        part = dict(zip(G.nodes(), assignments))
    else:
        # get assignments from Graph
        part = dict([(n[0], int(n[1]['partition'])) for n in G.nodes(data=True)])
    mod = community.modularity(part, G)
    return mod
def calc_modularity(G, nodal_partition):
    '''
    A function that calculates modularity from the best partition
    of a graph using the louvain method
    '''
    import community

    modularity = community.modularity(nodal_partition, G)

    return modularity
예제 #29
0
def print_communities(partition, G):
    data = get_data(partition)
    communities = pd.DataFrame(data).groupby('community')
    for key, item in communities:
        print(communities.get_group(key), "\n")

    global nc
    nc = len(communities)
    modularity = community.modularity(partition, G)
    print("Number of Communities: ", nc, "\nModularity: ", modularity)
def cluster_graph(H, resolution, weight='weight'):
    print("------------Louvain------------------")
    results_df = pd.DataFrame()
    results_df.index.name = 'Timepoint'
    for i in range(n_year):
        start = timeit.default_timer()
        if i == 0:
            cluster_dict = {}
        num_clusters_last = len(set(cluster_dict.values()))
        Graph = H[i]
        num_nodes = len(Graph.nodes)
        partition_dict = {}
        num_increment = 0
        for node in Graph.nodes:
            if node in cluster_dict:
                partition_dict[node] = cluster_dict[node]
            else:
                partition_dict[node] = num_clusters_last + num_increment
                num_increment += 1
        cluster_dict = community.best_partition(Graph,
                                                resolution=resolution,
                                                partition=partition_dict,
                                                weight=weight)
        num_clusters = len(set(cluster_dict.values()))
        nx.set_node_attributes(Graph, cluster_dict, 'Louvain cluster')

        stop = timeit.default_timer()
        cal_time = stop - start

        num_edges = len(Graph.edges)
        modularity = community.modularity(cluster_dict, Graph)

        results_df.loc[(i + min_year), 'Resolution'] = resolution
        results_df.loc[(i + min_year), 'Num_Clusters'] = num_clusters
        results_df.loc[(i + min_year), 'Modularity'] = modularity
        results_df.loc[(i + min_year), 'Num_Nodes'] = num_nodes
        results_df.loc[(i + min_year), 'Num_Edges'] = num_edges
        results_df.loc[(i + min_year), 'Calculation_Time'] = cal_time
        #display(results_df.loc[(i+min_year):(i+min_year+1),:])
        print('Year: {:4d}'.format(i + min_year),
              "| {:6d} nodes ".format(num_nodes),
              "| {: 5d} clusters".format(num_clusters),
              "| Modularity: {:.6f}".format(modularity),
              " | Calculation time: {: 6.2f} sec".format(cal_time))

    #w = pd.ExcelWriter(graph_dir + 'Clustering_Results' + desc + '.xlsx')
    #sheetname = 'Clustering_Results'
    #results_df.to_excel(w, sheetname)
    #w.sheets[sheetname].set_column(0, 7, 10)
    #w.save()
    results_df.to_csv(graph_dir + 'Clustering_Results_ver10' + desc + '.csv')
    display(results_df)

    return H
예제 #31
0
def get_rf_metric_cutoff(G_origin,
                         weight="weight",
                         cutoff_step=0.025,
                         drop_threshold=0.01):
    """Get good clustering cutoff points for Ricci flow metric by detect the change of modularity while removing edges.

    Parameters
    ----------
    G_origin : NetworkX graph
        A graph with "weight" as Ricci flow metric to cut.
    weight : str
        The edge weight used as Ricci flow metric. (Default value = "weight")
    cutoff_step : float
        The step size to find the good cutoff points.
    drop_threshold : float
        At least drop this much to considered as a drop for good_cut.

    Returns
    -------
    good_cuts : list of float
        A list of possible cutoff point, usually we use the first one as the best cut.
    """

    G = G_origin.copy()
    modularity, ari = [], []
    maxw = max(nx.get_edge_attributes(G, weight).values())
    cutoff_range = np.arange(maxw, 1, -cutoff_step)

    for cutoff in cutoff_range:
        G = cut_graph_by_cutoff(G, cutoff, weight=weight)
        # Get connected component after cut as clustering
        clustering = {
            c: idx
            for idx, comp in enumerate(nx.connected_components(G))
            for c in comp
        }
        # Compute modularity
        modularity.append(community_louvain.modularity(clustering, G, weight))

    good_cuts = []
    mod_last = modularity[-1]

    # check drop from 1 -> maxw
    for i in range(len(modularity) - 1, 0, -1):
        mod_now = modularity[i]
        if mod_last > mod_now > 1e-4 and abs(
                mod_last - mod_now) / mod_last > drop_threshold:
            logger.trace(
                "Cut detected: cut:%f, diff:%f, mod_now:%f, mod_last:%f" %
                (cutoff_range[i + 1], mod_last - mod_now, mod_now, mod_last))
            good_cuts.append(cutoff_range[i + 1])
        mod_last = mod_now

    return good_cuts
    def test_karate(self):
        """"test modularity on Zachary's karate club"""
        graph = nx.karate_club_graph()
        part = co.best_partition(graph, random_state=0)
        self.assertTrue(co.modularity(part, graph) > 0.41)

        for e1, e2 in graph.edges():
            graph[e1][e2]["test_weight"] = 1.

        part_weight = co.best_partition(graph,
                                        weight="test_weight",
                                        random_state=0)
        self.assertAlmostEqual(co.modularity(part, graph),
                               co.modularity(part_weight, graph,
                                             "test_weight"),
                               places=2)

        part_res_low = co.best_partition(graph, resolution=0.1)
        self.assertTrue(
            len(set(part.values())) < len(set(part_res_low.values())))
예제 #33
0
def test_louvain_with_edgevals(graph_file):
    gc.collect()

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True)
    nx_parts = networkx_call(M)
    # Calculating modularity scores for comparison
    Gnx = nx.from_pandas_edgelist(
        M, source="0", target="1", edge_attr="weight", create_using=nx.Graph()
    )
    cu_map = {0: 0}
    for i in range(len(cu_parts)):
        cu_map[cu_parts["vertex"][i]] = cu_parts["partition"][i]
    assert set(nx_parts.keys()) == set(cu_map.keys())
    cu_mod_nx = community.modularity(cu_map, Gnx)
    nx_mod = community.modularity(nx_parts, Gnx)
    assert len(cu_parts) == len(nx_parts)
    assert cu_mod > (0.82 * nx_mod)
    assert abs(cu_mod - cu_mod_nx) < 0.0001
예제 #34
0
def test_louvain_with_edgevals(graph_file):
    M = read_mtx_file(graph_file + '.mtx')
    cu_M = read_csv_file(graph_file + '.csv')
    cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True)
    nx_parts = networkx_call(M)

    # Calculating modularity scores for comparison
    Gnx = nx.Graph(M)
    cu_map = {0: 0}
    for i in range(len(cu_parts)):
        cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i]
    assert set(nx_parts.keys()) == set(cu_map.keys())
    cu_mod_nx = community.modularity(cu_map, Gnx)
    nx_mod = community.modularity(nx_parts, Gnx)
    assert len(cu_parts) == len(nx_parts)
    assert cu_mod > (.82 * nx_mod)
    print(cu_mod)
    print(cu_mod_nx)
    print(nx_mod)
    assert abs(cu_mod - cu_mod_nx) < .0001
예제 #35
0
 def test_modularity_increase(self):
     """
     Generate a dendrogram and test that modularity is always increasing
     """
     graph = nx.erdos_renyi_graph(1000, 0.01)
     dendo = co.generate_dendrogram(graph)
     mods = [
         co.modularity(co.partition_at_level(dendo, level), graph)
         for level in range(len(dendo))
     ]
     self.assertListEqual(mods, sorted(mods))
예제 #36
0
 def find_communities(self, G):
     tab = Table()
     start = timeit.default_timer()
     communities = community.best_partition(G)
     M = community.modularity(communities, G)
     stop = timeit.default_timer()
     tab.from_tuples([(len(set(communities.values())), M, (stop - start))],
                     columns=['Unique', 'M', 'Time (sec)'])
     tab.sort_values(by='M', ascending=False)
     tab.display()
     return communities
def add_louvain_communities():
    partition = community.best_partition(Graph, random_state=42)
    nodes_info['community'] = -1
    for node in partition:
        nodes_info.iloc[int(node),
                        nodes_info.columns.get_loc('community')] = int(
                            partition.get(node))
    print("Community partition modularity: ",
          round(community.modularity(partition, Graph), 3))
    print("Number of communities: ", len(set(partition.values())))
    print("-----------")
예제 #38
0
    def compute_modular(self):
        '''

        :return: 模块度的取值范围为:[−1/2,1)[−1/2,1),有可能得到负值;
                   论文表示当Q值在0.3~0.7之间时,说明聚类的效果很好
        '''

        G = nx.Graph()
        allneurons = self.getNeurons()
        nids = [n.id for n in allneurons]
        G.add_nodes_from(nids)

        synapses = self.getSynapses()
        for s in synapses:
            G.add_edge(s.fromId, s.toId)

        part = community.best_partition(G)
        return (community.modularity(part, G), part)

        # 生成点与点之间的距离矩阵,这里用的欧氏距离:
        '''
        points = self.getConnectionMarix(returntype=list, valuetype='01')
        disMat = sch.distance.pdist(points, 'euclidean')
        # 进行层次聚类:
        Z = sch.linkage(disMat, method='average')
        # 根据linkage matrix Z得到聚类结果:
        cluster = sch.fcluster(Z,t=0,criterion='inconsistent')
        print('网络'+str(self.id)+'的模块划分:'+str(cluster))
        # 计算模块度,基于"Neural Modularity Helps Organisms Evolve to
        # Learn New Skills without Forgetting Old Skills"
        '''

        #1.生成社区矩阵
        '''
        m = len(set(cluster))  # 聚类个数
        if m <= 1:
            return 0.
        n = len(points)
        points = np.array(points)
        lin = list(map(np.sum, points))
        col = list(map(np.sum, zip(*points)))
        sum = 0.
        for i in range(n):
            for j in range(n):
                if cluster[i] != cluster[j]:
                    continue
                ki_in = col[i]
                kj_out = lin[j]
                sum += points[i][j] - ki_in * kj_out / (2 * m)

        Q = sum / (2 * m)
        return Q
        '''
        '''
예제 #39
0
파일: main.py 프로젝트: FozAhm/comp596
def louvain(G, number_of_nodes, matlab_bs=False):

    print('\nLouvain Algorithm')
    louvain = community.best_partition(G)
    #print('Louvain Communities:\n', louvain)
    louvain_labels_predicted, num_of_communities_louvain = get_predicted_label_from_louvain(louvain, number_of_nodes, matlab_bs)
    print('Number of Communities with Louvain:', num_of_communities_louvain)
    print('Modularity:', community.modularity(louvain, G))
    #print('Louvain Truth Labels:\n', louvain_labels_predicted)

    return louvain_labels_predicted
예제 #40
0
 def do_a_series_of_propagations(self):
     """
     Doing propagations until convergence or reaching time budget.
     """
     index = 0
     while index < self.rounds and self.flag:
         index = index + 1
         print("\nLabel propagation round: " + str(index)+".\n")
         self.do_a_propagation()
     print("")
     print("Modularity is: "+  str(round(modularity(self.labels,self.graph),3)) + ".\n")
     json_dumper(self.labels, self.args.assignment_output)
예제 #41
0
def sim_community_maker2(dismat,threshold,tags=None):
    adjmat = dismat.copy()
    np.fill_diagonal(adjmat,
                     np.min(dismat))  # Set the diagonal elements to a small value so that they won't be zeroed out
    adjmat = adjmat.reshape((-1,))
    adjmat[adjmat > threshold] = 0
    adjmat = adjmat.reshape(dismat.shape)

    G = make_graph(adjmat,labels=tags)
    partition = partition_calculate1(G)
    F = community.modularity(partition, G)
    return(F)
예제 #42
0
def calcuModularity(listResult,edgeList):
    '''
    Calculate Modularity through networkx modularity
    https://programminghistorian.org/en/lessons/exploring-and-analyzing-network-data-with-python
    '''
    G = nx.Graph()
    G.add_weighted_edges_from(edgeList)
    partition={}
    for item in range(len(listResult)):
        partition[item] = listResult[item]
    global_modularity = community.modularity(partition, G)
    return global_modularity
def get_comm_dict_and_partition(g):
    partition = community.best_partition(g)
    print "Louvain Modularity: ", community.modularity(partition, g)
    print "Louvain Partition: ", partition

    reverse_dict = {}
    for node in partition:
        if partition[node] not in reverse_dict:
            reverse_dict[partition[node]] = []
        reverse_dict[partition[node]].append(node)
    print 'Node List Dict:', reverse_dict
    return reverse_dict, partition
def Mod(G,usebest=True,l=1):
	D = G.to_undirected()
	dendo = community.generate_dendogram(D, None)
	if usebest:
		level = len(dendo)-1
	else:
		level = l
	partition = community.partition_at_level(dendo,level)
	mod = community.modularity(partition, D)
	for n in G:
		G.node[n]['m'] = partition[n]
	return mod
예제 #45
0
def neural_modularity_calculator(graph, embedding, means):
    """
    Function to calculate the GEMSEC cluster assignments.
    """
    assignments = {}
    for node in graph.nodes():
        positions = means - embedding[node, :]
        values = np.sum(np.square(positions), axis=1)
        index = np.argmin(values)
        assignments[int(node)] = int(index)
    modularity = community.modularity(assignments, graph)
    return modularity, assignments
예제 #46
0
def classical_modularity_calculator(graph, embedding, args):
    """
    Function to calculate the DeepWalk cluster centers and assignments.
    """
    kmeans = KMeans(n_clusters=args.cluster_number, random_state=0,
                    n_init=1).fit(embedding)
    assignments = {
        i: int(kmeans.labels_[i])
        for i in range(0, embedding.shape[0])
    }
    modularity = community.modularity(assignments, graph)
    return modularity, assignments
예제 #47
0
def test_louvain(graph_file):
    gc.collect()

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_parts, cu_mod = cugraph_call(cu_M)
    nx_parts = networkx_call(M)

    # Calculating modularity scores for comparison
    Gnx = nx.from_pandas_edgelist(M, source='0', target='1',
                                  edge_attr='weight', create_using=nx.Graph())
    cu_map = {0: 0}
    for i in range(len(cu_parts)):
        cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i]
    assert set(nx_parts.keys()) == set(cu_map.keys())

    cu_mod_nx = community.modularity(cu_map, Gnx)
    nx_mod = community.modularity(nx_parts, Gnx)
    assert len(cu_parts) == len(nx_parts)
    assert cu_mod > (.82 * nx_mod)
    assert abs(cu_mod - cu_mod_nx) < .0001
예제 #48
0
 def modularity(self):
     part = self.get_partition()
     if not part:
         return None
     g = self.getG()
     try:
         modul = pylouvain.modularity(part, g)
     except NameError:
         self.log.error('python-louvain) library is not installed \n \
                   Modularity can\'t be computed ')
         modul = None
     return modul
예제 #49
0
def calc_graph(matrix):
    thresholds = [90, 85, 80, 75]
    glob = np.zeros((1, 4))
    loc = np.zeros((1, 4))
    Q = np.zeros((1, 4))
    Ch = np.zeros((1, 4))
    Ph = np.zeros((1, 4))

    data = np.zeros((1, 5))

    # Run graph measure analysis
    for index, threshold in enumerate(thresholds):
        graph = mat2graph_threshold(matrix, threshold)

        # Calculating global and average local efficiency
        glob[0, index] = nx.global_efficiency(graph)
        loc[0, index] = nx.local_efficiency(graph)

        # Community detection and modularity (1.25 )
        part = community.best_partition(graph, weight='1.25')
        Q[0, index] = community.modularity(part, graph)

        # Calculating connector and provincial hubs
        Z = module_degree_zscore(matrix, part)
        P = participation_coefficient(matrix, part)
        # connector hubs
        ch = np.zeros(matrix.shape[0])
        for i in range(len(ch)):
            if P[i] > 0.8 and Z[i] < 1.5:
                ch[i] = 1.0

            Ch[0, index] = np.sum(ch)

        # provincial hubs
        ph = np.zeros(matrix.shape[0])
        for i in range(len(ph)):
            if P[i] <= 0.3 and Z[i] >= 1.5:
                ph[i] = 1
            Ph[0, index] = np.sum(ph)

    # Averaging over each graph threshold
    meanglob = np.mean(glob)
    meanloc = np.mean(loc)
    meanQ = np.mean(Q)
    meanCh = np.mean(Ch)
    meanPh = np.mean(Ph)
    data[0, 0] = meanglob
    data[0, 1] = meanloc
    data[0, 2] = meanQ
    data[0, 3] = meanCh
    data[0, 4] = meanPh
    return (data)
def lp(direction, graph):
    print('lp.....')

    print('Running query for community detection.....')
    t = datetime.now()
    louvain_query = graph.run('''
        CALL algo.labelPropagation.stream("User", null,
        {direction: ''' + '"' + direction + '"' + ''', iterations: 10})
        YIELD nodeId, label
        RETURN algo.getNodeById(nodeId) as node, label
        ''').data()
    time_taken = datetime.now() - t

    print('Converting results.....')
    results = defaultdict(list)
    for item in louvain_query:
        results[str(item['label'])].append(str(item['node']['id']))

    print('Get Neo4jGraph.....')
    graph_query = graph.run('''
        MATCH (n:User)-[r]->(m:User)
        RETURN n.id,TYPE(r),m.id 
    ''').to_data_frame()

    print('Convert graph to nx graph.....')
    nx_graph = nx.from_pandas_edgelist(
        df=graph_query,
        source='n.id',
        target='m.id',
        edge_attr=True,
        create_using=nx.MultiGraph(name='Travian_Graph'))

    modularity = None
    print('Computing modularity.....')
    try:
        modularity = nx.algorithms.community.modularity(
            nx_graph, results.values())
    except Exception as e:
        print(repr(e))
    try:
        modularity = community.modularity(results.values(), nx_graph)
    except Exception as e:
        print(repr(e))

    extract_results('neo_lp' + direction, nx_graph, results, time_taken,
                    modularity)

    values = calc_color_values(nx_graph, results)

    visualize('Label Propagation ' + direction, nx_graph, values)

    print('lp finished.....')
예제 #51
0
def calc_community_modularity(bp, g):
    """
    calc_community_modularity(bp,g)
    calculate modularity using community
    :param bp:  best partition
    :param g:   source graph
    :return:    ---
    """
    logging.info(cs_ref, 'modularity from Louvain')
    modularity_info = "\tModularity : for best partition is : " + str(cmty.modularity(bp, g))
    with open(dest_file, "a") as dat_file:
        dat_file.write("\n" + modularity_info)
    print (modularity_info)
def graph_metric(G, metric):
    if metric == 'modularity':
        partition = community.best_partition(G)
        mod = community.modularity(partition, G)

        return mod
    if metric == 'global_clustering':
        if G.size() == 0:
            cluster_coeff = 0
        else:
            #			cluster_coeff = nx.algorithms.cluster.average_clustering(G)
            cluster_coeff = nx.transitivity(G)
        return cluster_coeff
    def community_detection(self):
        """
        Detects characteristics related to communities of graph and writes them 
        down to the 'Communities.txt' file. It also compares these characteristics
        with a random graph of the same node-size and edge-size.
        """
        partitionx = communityx.greedy_modularity_communities(self.G)
        """Modularity & Coverage"""
        modularity = community.modularity(self.best_parts, self.G)  #XXX
        coverage = communityx.coverage(self.G, partitionx)
        """in the corresponding random graph"""
        # H = nx.gnm_random_graph(self.G.number_of_nodes(),self.G.number_of_edges())
        H = nx.configuration_model([d for v, d in self.G.degree()])

        part = community.best_partition(H)  #XXX
        part2 = communityx.greedy_modularity_communities(H)
        modularity_rand = community.modularity(part, H)
        coverage_rand = communityx.coverage(H, part2)
        """Write File"""
        title = 'Communities.txt'
        com_file = open(self.path + title, 'w')
        com_file.write('Modularity:' + '\n')
        com_file.write(str(modularity) + '\n')
        com_file.write('Coverage' + '\n')
        com_file.write(str(coverage) + '\n')
        com_file.write('The corresponding random graph has modularity:' + '\n')
        com_file.write(str(modularity_rand) + '\n')
        com_file.write('The corresponding random graph has coverage:' + '\n')
        com_file.write(str(coverage_rand))
        com_file.write('\n')
        com_file.write('number of communities:' + '\n')
        com_file.write(str(max(self.best_parts.values()) + 1) + '\n')  #XXX
        # com_file.write(str(max(self.best_parts_x.values())+1)+'\n')
        com_file.write('\n')
        com_file.write(
            'The coverage of a partition is the ratio of the number of intra-community edges to the total number of edges in the graph.'
        )
        com_file.close()
        return modularity, coverage, modularity_rand, coverage_rand
예제 #54
0
 def get_modularity_value(self, a_grn, louvain=False):
     if isinstance(a_grn, list):
         a_grn = self.generate_directed_grn(a_grn)
     if louvain:
         modularity_partition = community.best_partition(
             a_grn.to_undirected())
     else:
         node_no = len(a_grn.nodes())
         modularity_partition = {}
         for i in range(node_no):
             modularity_partition[i] = int(i / 5)
     return community.modularity(modularity_partition,
                                 a_grn.to_undirected())
예제 #55
0
def graph_analysis(G):
    """Analyze graph. Returns a dictionary with useful data.
    Cannot deal with weights below 0, so all negative weights are set to 0."""
    MG                      = main_graph(G)
    for a,b in MG.edges():
        w = MG[a][b]['weight']
        if w < 0:
            MG[a][b]['weight'] = 0
    partition = community.best_partition(MG)
    return { 'num_clusters':    max(partition.values()),
             'modularity':      community.modularity(partition,MG),
             'size':            len(MG.nodes()),
             'partition':       partition}
예제 #56
0
def calc_modularity(G):
    
    import numpy as np
    import networkx as nx
    import community
    
    # Binarize both of the graphs
    for u,v,d in G.edges(data=True):
        d['weight']=1
            
    # Compute the best partition based on the threshold you've specified in cost
    partition = community.best_partition(G)

    modularity = community.modularity(partition, G)    
    
    return modularity
 def test_disjoint_clique(self) :
     """"
     A group of num_clique of size size_clique disjoint, should maximize the modularity
     and have a modularity of 1 - 1/ num_clique
     """
     for num_test in range(self.numtest) :
         size_clique = random.randint(5, 20)
         num_clique = random.randint(5, 20)
         g = nx.Graph()
         for i in range(num_clique) :
             clique_i = nx.complete_graph(size_clique)
             g = nx.union(g, clique_i, rename=("",str(i)+"_"))
         part = dict([])
         for node in g :
             part[node] = node.split("_")[0].strip()
         mod = co.modularity(part, g)
         self.assertAlmostEqual(mod, 1. - 1./float(num_clique),  msg = "Num clique: " + str(num_clique) + " size_clique: " + str(size_clique))
예제 #58
0
파일: crblh.py 프로젝트: cerebis/crblh
def decompose_graph(g, max_mod, decomposed = None):
    if decomposed is None:
        decomposed = []

    p = com.best_partition(g)
    if com.modularity(p, g) < max_mod:
        decomposed.append(g)
    else:
        # split communities
        part_ids = np.unique(p.values())
        for pi in part_ids:
            gi = g.copy()
            for n in g.nodes_iter():
                if p[n] != pi:  # remove all nodes not in partition
                    gi.remove_node(n)
            decompose_graph(gi, max_mod, decomposed)
    return decomposed
예제 #59
0
파일: tools.py 프로젝트: kasev/WordNets
def print_communities(G,sstt):
    part=comm.best_partition(G) 
    print 'Number of communities of %s = %i' %(sstt, max(part.values())+1)
    print 'Community partition of %s:' %(sstt)
    parLis=[]
    partdi={}
    for i,k in part.items():
        if k not in partdi:
            partdi[k]=[i]
        else:
            partdi[k].append(i)
    for i,k in partdi.items():
        parLis.append(k)
    print parLis
    nodper={i:i for i in G.nodes()}
    print 'Community modularity of %s = %.4f' %(sstt, comm.modularity(part,G))
    return part,nodper