Esempio n. 1
0
def community_dection_graph(MSTgraph, num_comms=20, mst=True):
    if mst:
        communities_generator = community.girvan_newman(
            MSTgraph.graph, most_valuable_edge=most_central_edge)
    else:
        communities_generator = community.girvan_newman(
            MSTgraph, most_valuable_edge=most_central_edge)
    result = []
    for communities in itertools.islice(communities_generator, num_comms):
        result.append(tuple(sorted(c) for c in communities))
    return result
Esempio n. 2
0
def gn_time(G):

    # define a function to compute weighted centrality betweenness
    def most_central_edge(G):
        centrality = betweenness(G, weight='weight')
        return max(centrality, key=centrality.get)

    # initiate a list to store execution time for each algo
    algo_time = []

    for i in tqdm(range(10)):

        # start
        start_time = time.time()

        # fit the model
        if nx.is_weighted(G):
            solutions = girvan_newman(G, most_valuable_edge=most_central_edge)
        else:
            solutions = girvan_newman(G)

        # assign the number of times partitioning
        k = len(G.edges)

        # register modularity scores
        modularity_scores = dict()

        # initiate a maximum modularity score
        max_score = 0

        # initiate count (stopping criterion)
        count = 0

        # iterate over solutions
        for community in itertools.islice(solutions, k):
            solution = list(sorted(c) for c in community)
            score = modularity(G, solution)
            # store modularity score
            modularity_scores[len(solution)] = score
            if score > max_score:
                # save the community structure with highest modularity score
                community_structure = list(solution)
                max_score = score
                count = 0
            else:
                count = count + 1
            if count == 5:
                break

        algo_time.append(time.time() - start_time)

    return np.mean(algo_time)
Esempio n. 3
0
def get_girvan_newman(graph, num_components):
    gn_hierarchy = community.girvan_newman(graph)
    coms_gn = tuple()
    for partitions in itertools.islice(gn_hierarchy, num_components):
        coms_gn = partitions

    return coms_gn
Esempio n. 4
0
def draw_community(g, position):
    start_time = time.clock()
    communities_generator = community.girvan_newman(g)
    end_time = time.clock()
    top_level_communities = next(communities_generator)
    next_level_communities = next(communities_generator)
    next_level_communities2 = next(communities_generator)
    next_level_communities3 = next(communities_generator)
    # position = nx.spring_layout(g)  # calculate position for each node
    # pos is needed because we are going to draw a few nodes at a time,
    # pos fixes their positions.
    nx.draw(g, position, edge_color='k', with_labels=True,
            font_weight='light', node_size=280, width=0.9)
    colors = ['r', 'g', 'b', 'c', 'm', 'y']

    # for c in top_level_communities:
    #   nx.draw_networkx_nodes(g, position, nodelist=list(c), node_color=colors[index])
    #  index += 1
    plt.title("Graph generated with GN algorithm")
    index = 0
    for c in next_level_communities3:
        nx.draw_networkx_nodes(g, position, nodelist=list(c), node_color=colors[index])
        index += 1

    plt.show()
    print(end_time - start_time)
def get_communities(graph_object, iterations=5, print_communities=True):
    """
    Uses the community module (extension for networkx) to find communities
    in the graph. Uses Girvan Newman method:
    graph_object: Graph to detect communities in
    iterations: How many times to attempt community subdivision using Girvan Newman
    The more iterations, the more aggressive the algorithm is at breaking out communities
    into smaller chunks.

    return: Community map object, (dict)
    """
    communities_map = {}

    community_generator = co.girvan_newman(graph_object)
    for _ in range(iterations):
        communities = next(community_generator)

    for ix, community_list in enumerate(communities):
        if len(community_list) > 5:
            if print_communities:
                print(community_list)
            node_id = ix + 1
        else:
            node_id = 0
        for node in community_list:
            communities_map[node] = node_id

    return communities_map
Esempio n. 6
0
def girvan_newman_partition(graph):
    partition = girvan_newman(graph)
    res = dict()
    for i, part in enumerate(partition):
        for j in part:
            res[str(j)] = i
    draw_graph(res, "Girvan Newman")
Esempio n. 7
0
def best_split(wordPairs):
    """
    Giving a Graph, return the best community partition
    
    :param Graph: a graph constructed with the most similar word pairs 
    :return: (level of partition that gives the best performance, best performance, best partition)
    """
    from networkx.algorithms import community
    from networkx.algorithms.community.quality import performance, coverage
    import networkx as nx

    Graph = nx.Graph()
    edges = [(pair[0][0], pair[0][1]) for pair in wordPairs]
    edgewidth = [pair[1] * 10 for pair in wordPairs]
    Graph.add_edges_from(edges)

    max_pc = 0
    max_index = None
    best_communities = None
    communities_generator = community.girvan_newman(Graph)
    for i, communities in enumerate(communities_generator):
        p = performance(Graph, communities)
        c = coverage(Graph, communities)
        if 2 * p * c / (p + c) > max_pc:
            max_index = i
            max_pc = 2 * p * c / (p + c)
            best_communities = communities
    return (max_index, max_pc, best_communities)
Esempio n. 8
0
    def communityCalculation(self, GRAPH, reverseOrd):

        timecom = time.time()
        communities_generator = community.girvan_newman(GRAPH)
        print "Calculating the communities in ...." + str(time.time() -
                                                          timecom)

        allCommunities = set()
        communityLevel = {}
        allCommunities.add(frozenset(GRAPH.nodes))
        communityLevel[frozenset(GRAPH.nodes)] = 0
        i = 1
        for communities in itertools.islice(communities_generator,
                                            GRAPH.number_of_nodes()):
            if self.cnf.verbose_log:
                print(tuple(sorted(c) for c in communities))
            for c in communities:
                allCommunities.add(frozenset(c))
                communityLevel[frozenset(c)] = i
            i = i + 1

        sorted_ = sorted(communityLevel.items(),
                         key=operator.itemgetter(1),
                         reverse=reverseOrd)

        return sorted_
Esempio n. 9
0
    def get_expanded_query(self, q, args):
        qid = args[0]
        selected_words = []
        docids = self.get_topn_relevant_docids(qid)
        tfidfs = []
        for docid in docids:
            tfidfs.append(self.get_tfidf(docid))

        G = nx.Graph()
        for i in range(len(docids)):
            G.add_node(docids[i])
            for j in range(i + 1, len(docids) - 1):
                sim = self.getsim(tfidfs[i], tfidfs[j])
                if sim > 0.5:
                    G.add_weighted_edges_from([(docids[i], docids[j], sim)])
        comp = community.girvan_newman(G)
        partitions = tuple(sorted(c) for c in next(comp))
        for partition in partitions:
            if len(partition) > 1:
                pairlist = []
                for p in partition:
                    pairlist.append(
                        self.get_top_word(tfidf=tfidfs[docids.index(p)]))

                top_k = self.get_top_k(pairlist, self.topw)
                for (word, value) in top_k:
                    selected_words.append(word)

        query_splited = q.lower().split()
        for word in selected_words:
            if word.lower() not in query_splited:
                query_splited.append(word)

        return ' '.join(query_splited)
Esempio n. 10
0
def apply_gn(g, subsize=1000):
    print 'COMPUTING GIRVAN-NEWMAN SCORE'
    ntimes = 3
    iterations = dict()
    g = g.to_undirected()
    gn_hierarchy = community.girvan_newman(g)

    for i in range(ntimes):
        coms_gn = [tuple(x) for x in next(gn_hierarchy)]
        max_len = max([len(c) for c in coms_gn])
        min_len = min([len(c) for c in coms_gn])
        max_community = [c for c in coms_gn if len(c) == max_len][0]

        print 'ON ITERATION ' + str(i + 1) + ' GREATEST COMMUNITY COMPOSED' \
            ' BY ' + str(max_len) + ' NODES'

        iterations[i + 1] = coms_gn

        extract_info({
            'community': max_community,
            'fname': './results_ita/girvan_newman/it_' + str(i + 1) + '_',
            'ncommunities': len(coms_gn),
            'maxcomlen': max_len,
            'mincomlen': min_len
        })

    evaluate_partition({
        'alg': 'girvan-newman',
        'network': g,
        'partition': iterations
    })

    return iterations
Esempio n. 11
0
def community_algorithm(graph_edges, graph_nodes, threshold, max_depth, verbose):
    """ if call by SLOC, fill corrSLOC2Bt
        graph_node is a set
        graph_edges is a dictionnary: key is edge, value is list of deltas.
        returns generator of communities in hierarchical order
    """
    edges_count = {}
    G = nx.DiGraph()
    G.add_nodes_from(list(graph_nodes))
    for edge, deltas in graph_edges.items():
        #count = sum(map(lambda delta: delta < float(threshold), deltas))
        count = sum(map(lambda delta: delta < float(threshold), deltas))
        edges_count[edge] = count
        if count > 0:
            G.add_edge(edge[0], edge[1], count=count)
    communities_generator = community.girvan_newman(G)
    try:
        com = next(communities_generator)
        if verbose>3:
            print("Number communities:",len(com))
            print("Communities:",com)
    except StopIteration:
        if verbose>3:
            print("No Community found: number nodes {}.".format(len(graph_nodes)))
        return None
    return com
def girvan_newman(G, k, weight='weight'):
    '''Community detection using Girvan-Newman algorithm.
    
    Parameters
    ----------
    G : networkx.graph

    k : number of communities
    
    weight : edge attribute if G is weighted or None if G is unweighted

    Returns
    -------
    list_communities : list
        A list of k sets, and each set contains vertices in one community.
    
    Notes
    -----
    This function only deals with undirected graph.
    '''
    # determine most_valuable_edge according to weighted or not
    mvg = None if weight is None else most_valuable_edge
    communities = community.girvan_newman(G.to_undirected(), most_valuable_edge=mvg)

    # k must be not larger than number of nodes, or return an empty set
    if k > len(G.nodes()):
        return []

    # get (k-1)th community partition
    for com in itertools.islice(communities, k-1):
        list_communities = list(com)
    return list_communities
Esempio n. 13
0
def girvanNewmanWrapper(gwr, minNumCommunities):
    commGen = community.girvan_newman(gwr)
    commLevels = itertools.takewhile(lambda c: len(c) <= minNumCommunities,
                                     commGen)
    for comm in commLevels:
        lastComm = community
    return lastComm
Esempio n. 14
0
    def _girvan_newman(self) -> list:
        k = self._k - 1
        G = nx.read_edgelist(global_variable.graph_path)

        # self.modularity(G, G.nodes)
        # nx.algorithms.community.modularity(G, [{0, 1, 2}, {3, 4, 5}])

        comp = girvan_newman(G)
        result = ()
        for communities in itertools.islice(comp, k):
            result = tuple(sorted(c) for c in communities)
            print(tuple(sorted(c) for c in communities))

        food_item = self.get_selected_feature(global_variable.food_item)
        label = []
        for value in range(0, len(food_item)):
            label.append(0)

        count = 0
        for value in result:
            for index in value:
                label[int(index)] = count

            count += 1

        return label
Esempio n. 15
0
    def __graph_construction(self, X):
        """Clustering labels after constructing graph adjacency matrix empirically.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_labels)
            Matrix `X`.

        Returns
        -------
        community labels : a list of communities defining a community to a label association
        """

        A = X.T.dot(X)
        A = normalize_laplacian(A=A, sigma=self.sigma, return_adj=True, norm_adj=True)
        A = triu(A)
        # Create the graph
        G = nx.from_scipy_sparse_matrix(A=A)
        comp = community.girvan_newman(G)
        limited = itertools.takewhile(lambda c: len(c) <= self.num_communities, comp)
        for communities in limited:
            communities = communities
        communities = sorted([(idx, int(c)) for idx in range(len(communities)) for c in communities[idx]],
                             key=lambda x: x[1])
        communities = np.array([i for i, j in communities])
        return communities
Esempio n. 16
0
def add_girvan_newman(graph, most_valuable_edge=None):
    communities_result = nx_community.girvan_newman(graph, most_valuable_edge)
    # The girvan_newman algorithm returns communities at each level of the iteration.
    # We choose the top level community.
    top_level_communities = next(communities_result)
    _nx_community_data_to_graph(graph, top_level_communities)
    return graph
Esempio n. 17
0
        def community_generator(graph):
            communities_generator = community.girvan_newman(graph)

            top_level_communities = next(communities_generator)
            next_level_communities = next(communities_generator)

            return next_level_communities  # , top_level_communities
Esempio n. 18
0
    def split_graph(self, graph_to_split=None, parts=1):
        if graph_to_split is None:
            graph_to_split = self.G

        multi_graph = []

        comp = girvan_newman(graph_to_split)

        def community_generator(graph):
            communities_generator = community.girvan_newman(graph)

            top_level_communities = next(communities_generator)
            next_level_communities = next(communities_generator)

            return next_level_communities  # , top_level_communities

        next_level_communities = community_generator(graph_to_split)

        for lvl_comunnity in sorted(map(sorted, next_level_communities)):
            community_graph = nx.Graph()
            for node in lvl_comunnity:
                # for node in nodes:
                community_graph.add_node(node)
            for node_in_community in list(community_graph.nodes):
                try:
                    relations = [
                        relation
                        for relation in self.G.edges(node_in_community)
                    ]
                    for relation in relations:
                        if community_graph.has_node(
                                relation[0]) and community_graph.has_node(
                                    relation[1]):
                            relation_weight = self.G[relation[0]][
                                relation[1]].get('weight', 0)
                            community_graph.add_edge(node_in_community,
                                                     relation,
                                                     weigth=relation_weight)
                        else:
                            print("Missing: {}".format(
                                relation[1] if community_graph.
                                has_node(relation[0]) else relation[0]))
                except Exception as e:
                    print(traceback.format_exc())
                    print("error: {}".format(str(e)))
            # for n_comunnity in sorted(map(sorted, next_communities)):
            #    print(n_comunnity)
            multi_graph.append(community_graph)

        k = parts
        for communities in itertools.islice(comp, k):
            community_graph = nx.Graph()
            for names in tuple(sorted(c) for c in communities):
                for name in names:
                    type = 'Model' if name in self.graph_type(
                        'Model', graph_to_split) else 'View'
                    community_graph.add_node(name, type=type)
            multi_graph.append(community_graph)
        return multi_graph
Esempio n. 19
0
 def test_directed(self):
     G = nx.DiGraph(nx.path_graph(4))
     communities = list(girvan_newman(G))
     assert_equal(len(communities), 3)
     validate_communities(communities[0], [{0, 1}, {2, 3}])
     validate_possible_communities(communities[1], [{0}, {1}, {2, 3}],
                                   [{0, 1}, {2}, {3}])
     validate_communities(communities[2], [{0}, {1}, {2}, {3}])
def community_detection(edge_list=path+'connected-component-analysis/network-profiling-data/cid6_analysis/cid6-edge-list'):
    # too slow...
    from networkx.algorithms import community
    G = nx.read_edgelist(edge_list, delimiter='\t')
    communities_generator = community.girvan_newman(G)
    top_level_communities = next(communities_generator)
    next_level_communities = next(communities_generator)
    print len(sorted(map(sorted, next_level_communities)))
Esempio n. 21
0
 def test_directed(self):
     G = nx.DiGraph(nx.path_graph(4))
     communities = list(girvan_newman(G))
     assert_equal(len(communities), 3)
     validate_communities(communities[0], [{0, 1}, {2, 3}])
     validate_possible_communities(communities[1], [{0}, {1}, {2, 3}],
                                   [{0, 1}, {2}, {3}])
     validate_communities(communities[2], [{0}, {1}, {2}, {3}])
Esempio n. 22
0
def community_gn(G, weight_key='weight', **kwargs):
    def most_central_edge(G):
        centrality = betweenness(G, weight=weight_key)
        return max(centrality, key=centrality.get)

    girvan_results = community.girvan_newman(
        G, most_valuable_edge=most_central_edge)
    return next(girvan_results)
Esempio n. 23
0
def get_communities(graph, modularity=False, fluid=False):
    k = len([key for key in graph.node.keys()]) / 10
    if modularity:
        return community.greedy_modularity_communities(graph)
    if fluid:
        return community.asyn_fluidc(graph, k)
    else:  # how work
        return community.girvan_newman(graph)
Esempio n. 24
0
def create_clusters_from_girvannewman(G):
    comp = community.girvan_newman(G)
    clusters = []
    i = 0
    for partition in list(sorted(c) for c in next(comp)):
        clusters.append(cluster("girvan_newman", partition, colors[i]))
        i = i + 1

    return model("girvannewman", clusters)
Esempio n. 25
0
def create_and_assign_communities(text_network):
    logging.info("Assigning communities")
    communities_generator = community.girvan_newman(text_network)
    top_level_communities = next(communities_generator)
    next_level_communities = next(communities_generator)
    communities = {}
    for community_list in next_level_communities:
        for item in community_list:
            communities[item] = next_level_communities.index(community_list)
    return communities
Esempio n. 26
0
    def top10_communities(
            self) -> Tuple[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]:
        if self._top10_communities is None:
            generator = girvan_newman(self.graph)
            communities = next(generator)
            while len(communities) < 10:
                communities = next(generator)
            self._top10_communities = nlargest(10, communities, key=len)

        return self._top10_communities
Esempio n. 27
0
def compute_girvan_newman_community_metrics(G):
    #Depends on removing links between links of high vertex betweeness and identifying clusters
    modularity = 0
    communities = community.girvan_newman(G)
    top_level_communities = next(communities)
    next_level_communities = next(communities)
    modularity += community.modularity(G, next_level_communities)
    print('Graph modularity based on Girvan Newmann clustering is {}'.format(
        modularity))
    return modularity
Esempio n. 28
0
 def girvan_newman(self, k):
     graph = build_rank_graph()
     comp = alg.girvan_newman(graph)
     limited = itertools.takewhile(lambda c: len(c) <= k, comp)
     f = open(os.path.abspath('..') + "/data/girvan_newman.txt", 'w')
     for communities in limited:
         print("community count:", len(communities), ":")
         print(tuple(sorted(c) for c in communities), file=f)
         print(" ")
         print(" ")
Esempio n. 29
0
 def test_selfloops(self):
     G = nx.path_graph(4)
     G.add_edge(0, 0)
     G.add_edge(2, 2)
     communities = list(girvan_newman(G))
     assert_equal(len(communities), 3)
     validate_communities(communities[0], [{0, 1}, {2, 3}])
     validate_possible_communities(communities[1], [{0}, {1}, {2, 3}],
                                   [{0, 1}, {2}, {3}])
     validate_communities(communities[2], [{0}, {1}, {2}, {3}])
Esempio n. 30
0
def get_communities_grivan_newman(G):
    communities_generator = community.girvan_newman(G)
    next_level_communities = None
    for comm in communities_generator:
        max_clique = max([len(x) for x in comm])
        print(max_clique)
        if max_clique < 0.1 * len(G.node) or max_clique < 15:
            next_level_communities = list(comm)
            break
    return next_level_communities
Esempio n. 31
0
 def test_most_valuable_edge(self):
     G = nx.Graph()
     G.add_weighted_edges_from([(0, 1, 3), (1, 2, 2), (2, 3, 1)])
     # Let the most valuable edge be the one with the highest weight.
     heaviest = lambda G: max(G.edges(data='weight'), key=itemgetter(2))[:2]
     communities = list(girvan_newman(G, heaviest))
     assert_equal(len(communities), 3)
     validate_communities(communities[0], [{0}, {1, 2, 3}])
     validate_communities(communities[1], [{0}, {1}, {2, 3}])
     validate_communities(communities[2], [{0}, {1}, {2}, {3}])
Esempio n. 32
0
 def test_selfloops(self):
     G = nx.path_graph(4)
     G.add_edge(0, 0)
     G.add_edge(2, 2)
     communities = list(girvan_newman(G))
     assert_equal(len(communities), 3)
     validate_communities(communities[0], [{0, 1}, {2, 3}])
     validate_possible_communities(communities[1], [{0}, {1}, {2, 3}],
                                   [{0, 1}, {2}, {3}])
     validate_communities(communities[2], [{0}, {1}, {2}, {3}])
Esempio n. 33
0
def main(argv):

    g = read_network.read_static_network(argv[1])

    gn_output = list(girvan_newman(g))
    solutions = []
    for solution in gn_output:
        solutions.append(modularity(g, solution))

    print('modularité maximale détectée par Girvan et Newman: ',
          max(solutions))
Esempio n. 34
0
 def test_undirected(self):
     # Start with the graph .-.-.-.
     G = nx.path_graph(4)
     communities = list(girvan_newman(G))
     assert_equal(len(communities), 3)
     # After one removal, we get the graph .-. .-.
     validate_communities(communities[0], [{0, 1}, {2, 3}])
     # After the next, we get the graph .-. . ., but there are two
     # symmetric possible versions.
     validate_possible_communities(communities[1], [{0}, {1}, {2, 3}],
                                   [{0, 1}, {2}, {3}])
     # After the last removal, we always get the empty graph.
     validate_communities(communities[2], [{0}, {1}, {2}, {3}])
# In[17]:


from networkx.algorithms import approximation


# In[18]:


from networkx.algorithms import community


# In[19]:


communities_generator = community.girvan_newman(G)


# In[20]:


top_level_communities = next(communities_generator)
top_level_communities


# In[72]:


next_level_communities = next(communities_generator)
next_level_communities
Esempio n. 36
0
 def test_no_edges(self):
     G = nx.empty_graph(3)
     communities = list(girvan_newman(G))
     assert_equal(len(communities), 1)
     validate_communities(communities[0], [{0}, {1}, {2}])