예제 #1
0
def communityMining(G, minCommSize=10):
  """
  Find communities in the graph 'G' with more than 'minCommSize' nodes.
  """
  count = 0
  dendrogram = community.generate_dendrogram(G)
  firstPartition = community.partition_at_level(dendrogram,0)
  
  sys.stderr.write("Prune sparse clusters. ")
  #remove early small communities 
  sparseComm = set([k for k,v in Counter(firstPartition.values()).iteritems() if v<minCommSize])
  nodes = [node for node in G.nodes() if firstPartition[node] in sparseComm]
  G.remove_nodes_from(nodes)

  sys.stderr.write("Find communities. ")
  # Partition again the graph and report big communities:
  dendrogram = community.generate_dendrogram(G)
  partition = community.partition_at_level(dendrogram,len(dendrogram)-2)
  allfqdns =  set(n for n,d in G.nodes(data=True) if d['bipartite']==1)
  allHosts = set(n for n,d in G.nodes(data=True) if d['bipartite']==0)
  size = float(len(set(partition.values())))
  communities = []
  
  bigComm = [k for k,v in Counter(partition.values()).iteritems() if v>minCommSize]
  for com in bigComm :
    comfqdns = [nodes for nodes in allfqdns if partition[nodes] == com]
    comHosts = [nodes for nodes in allHosts if partition[nodes] == com]
    comm = G.subgraph(comfqdns+comHosts) 
    if comm.order() < minCommSize :
        sys.stderr("Remove small community (This shouldn't happen here?)\n")
        continue

    communities.append(comm)
    
  return communities 
예제 #2
0
def _graph_community(G):  # unused function
    '''The 'graph_community' function is used to analyse a corpus at 
       two levels of the dendrogram of the corpus coupling graph G 
       in a way that the size of all the communities are <= SIZECUT.
       Author: Sebastian Grauwin (http://sebastian-grauwin.com/bibliomaps/)
    
       Args:
           G (networkx object): corpus coupling graph.
        
       Returns:
           louvain_partition (dict): partition of the corpus coupling graph G. 
    
    '''

    # 3rd party import
    import community as community_louvain

    # TO DO: move SIZECUT in COUPL_GLOBAL_VALUES if _graph_community is used
    SIZECUT = 10  # Upper limit of size communities

    dendrogram, part, max_mod = _runpythonlouvain(G)
    part2 = part.copy()
    to_update = {}

    communities_id, nodes_id = set(part.values()), list(part.keys())
    for community_id in communities_id:
        list_nodes = [
            nodes for nodes in part.keys() if part[nodes] == community_id
        ]

        if len(list_nodes) > SIZECUT:  # split clusters of size > SIZECUT
            H = G.subgraph(list_nodes).copy()
            [dendo2, partfoo, mod] = _runpythonlouvain(H)
            dendo2 = community_louvain.generate_dendrogram(H, part_init=None)
            partfoo = community_louvain.partition_at_level(
                dendo2,
                len(dendo2) - 1)
            # add prefix code
            for aaa in partfoo.keys():
                partfoo[aaa] = (community_id + 1) * 1000 + partfoo[aaa]
            nb_comm = len(set(partfoo.values()))
            # "community_id" cluster ("len(list_nodes)" records) is split in nb_comm sub-clusters
            part2.update(partfoo)
        else:  # for communities of less than SIZECUT nodes, shift the com label as well
            for n in list_nodes:
                to_update[n] = ""
    for n in to_update:
        part2[n] += 1

    # ... save partitions
    louvain_partition = dict()
    for lev in range(len(dendrogram)):
        louvain_partition[lev] = community_louvain.partition_at_level(
            dendrogram, lev)
    # .. set communtity labels starting from 1 instead of 0 for top level
    for k in louvain_partition[len(dendrogram) - 1].keys():
        louvain_partition[len(dendrogram) - 1][k] += 1
    louvain_partition[len(dendrogram)] = part2

    return louvain_partition
예제 #3
0
def calc_louvain(adj_matrix, level=0, return_c_graph=False):
    nx_G = nx.from_numpy_matrix(adj_matrix)
    dendro = louvain.generate_dendrogram(
        nx_G, randomize=False)  #Maybe set tandomize True

    if len(dendro) - level - 1 < 0:
        raise Exception("The given Level is too deep. The maximum is: " +
                        str(len(dendro) - 1))

    communities = louvain.partition_at_level(dendro, len(dendro) - level - 1)
    number_communities = max(communities, key=lambda x: communities[x]) + 1

    # Maybe unnecessary after some code rework and unification
    community_list = []
    for i in range(number_communities):
        grp_list = []
        for grp in communities:
            if communities[grp] == i:
                grp_list.append(grp)
        else:
            if grp_list:
                community_list.append(grp_list)

    community_level_G = louvain.induced_graph(communities, nx_G)

    if return_c_graph:
        c_level_graph = nx.adjacency_matrix(community_level_G)
    else:
        c_level_graph = None

    return community_list, c_level_graph
예제 #4
0
 def prepare_communities(self):
     if hasattr(community, 'generate_dendrogram'):
         self.dendrogram = community.generate_dendrogram(self.g)
     else:
         self.dendrogram = community.generate_dendogram(self.g)
     for level in range(len(self.dendrogram)):
         pass 
예제 #5
0
def external_ec_coarsening(graph, sfdp_path, coarsening_scheme = 2, c_type = 'original'):
    if c_type == 'louvain':
        print("Coarsening with Louvain")
        matrix = magicgraph.to_adjacency_matrix(graph)
        nx_graph = nx.from_scipy_sparse_matrix(matrix)
        dendro = community.generate_dendrogram(nx_graph)

        coarse_graphs = [DoubleWeightedDiGraph(graph)]
        merges = []
        i = 0
        for l in range(len(dendro)):
            level = community.partition_at_level(dendro, l)
            induced = community.induced_graph(level, nx_graph)
            filename = 'induced'+str(l)+'.edgelist'
            #nx.write_edgelist(induced, filename)
            # write weighted graph to file
            f = open(filename, 'w')
            for u, v, a in induced.edges.data('weight', default = 1):
                line = ' '.join([str(u), str(v), str(a)])
                f.write(line + '\n')
            f.close()
            m_graph = magicgraph.load_weighted_edgelist(filename, undirected = True)
            coarse_graphs.append(DoubleWeightedDiGraph(m_graph))
            merges.append(level)
            print('Level: ', i, 'N nodes: ', m_graph.number_of_nodes())
            i+= 1

        return coarse_graphs, merges
    elif c_type == 'original':
        return original_coarsening(graph, sfdp_path, coarsening_scheme)
예제 #6
0
def apply_community_louvain(G):
    start_node_id, end_node_id = get_start_and_end_nodes(G)

    partition = community_louvain.best_partition(G)

    dendo = community_louvain.generate_dendrogram(G)
    highest_partition = community_louvain.partition_at_level(
        dendo, (len(dendo) - 1))
    communities = set(highest_partition.values())

    print("Communities;")

    list_of_communities = []

    community_count = 0
    for community_number in communities:
        community_items = [
            x for x in highest_partition
            if highest_partition[x] == community_number
        ]
        if start_node_id in community_items or end_node_id in community_items:
            continue
        list_of_communities.append(community_items)
        community_count = community_count + 1
        print(f"Community number {community_count}: {community_items}")

    return list_of_communities
예제 #7
0
    def louvianClustering(self, similarity_measure_list):
        edge_list = []
        node_list = []
        thresh = 0  #self.getThreshold(similarity_measure_list)
        for element in similarity_measure_list:
            f1, f2, val = element
            if (float(val) > thresh):
                edge_list.append((f1, f2, float(val)))
            node_list.append(f1)
            node_list.append(f2)
        node_list = list(set(node_list))
        G = nx.Graph()
        G.add_nodes_from(node_list)
        G.add_weighted_edges_from(edge_list)

        partition = community.best_partition(G)
        dendo = community.generate_dendrogram(G, None, 'weight', 1., False)
        testing = community.partition_at_level(dendo, len(dendo) - 1)
        res = community.modularity(partition, G, 'weight')

        list1 = [partition]
        cluster_set = set(val for dic in list1 for val in dic.values())
        cluster_set_elements = []
        for cluster_id in cluster_set:
            temp_elements = []
            for node, cluster in partition.iteritems():
                if (cluster == cluster_id):
                    temp_elements.append(node)
            cluster_set_elements.append(temp_elements)
        self.cluster_set = cluster_set_elements
        return cluster_set_elements
예제 #8
0
	def generateDendogram(self):
		self.Order = []
		
		self.dendogram = cm.generate_dendrogram(self.g)

		for level in range(len(self.dendogram)):
			self.Order.append(OrderedDict(sorted(self.dendogram[level].items(), key=lambda t: t[1])))
예제 #9
0
def louvain_community_detection(networkx_graph):
    """
    Do louvain community detection
    :param networkx_graph:
    :return:
    """
    return cm.partition_at_level(cm.generate_dendrogram(networkx_graph, randomize=True, weight='weight'), 0)
 def test_modularity_increase(self):
     """
     Generate a dendrogram and test that modularity is always increasing
     """
     graph = nx.erdos_renyi_graph(1000, 0.01)
     dendo = co.generate_dendrogram(graph)
     mods = [co.modularity(co.partition_at_level(dendo, level), graph)
             for level in range(len(dendo))]
     self.assertListEqual(mods, sorted(mods))
예제 #11
0
def _runpythonlouvain(G):  # unused function
    '''The "_runpythonlouvain" function  is used to analyse a corpus 
       at level "len(foo_dendrogram) - 1)" of the corpus coupling graph G dendrogram, 
       (see https://buildmedia.readthedocs.org/media/pdf/python-louvain/latest/python-louvain.pdf).
       Author: Sebastian Grauwin (http://sebastian-grauwin.com/bibliomaps/)
    
       Args:
           G (networkx object): corpus coupling graph.
        
       Returns:
           results (tuple): [dendrogram, partition, modularity,] where
               dendrogram [list of dict]: a list of partitions, ie dictionnaries 
                                          where keys of the i+1 dict are the values of the i dict;
               partition (dict): Louvain partition of the corpus coupling graph G 
                                 where dict keys are the pub IDs 
                                 and the dict values are the community IDs;
               modularity [float]: modularity.
    
    '''
    # standard library imports
    from collections import namedtuple

    # 3rd party import
    import community as community_louvain

    # TO DO: move NRUNS in COUPL_GLOBAL_VALUES if _runpythonlouvain is used.
    NRUNS = 1  # number of time the louvain algorithm is run for a given network,
    # the best partition being kept.

    named_tup_results = namedtuple('results', [
        'dendrogram',
        'partition',
        'modularity',
    ])

    max_modularity = -1
    for run in range(NRUNS):
        if NRUNS > 1:
            print(f'......run {run + 1}/{NRUNS}')
        foo_dendrogram = community_louvain.generate_dendrogram(G,
                                                               part_init=None)
        partition_foo = community_louvain.partition_at_level(
            foo_dendrogram,
            len(foo_dendrogram) - 1)
        modularity = community_louvain.modularity(partition_foo, G)
        if modularity > max_modularity:
            max_modularity = modularity
            partition = partition_foo.copy()
            dendrogram = foo_dendrogram.copy()

    louvain_part = named_tup_results(
        dendrogram,
        partition,
        modularity,
    )
    return louvain_part
예제 #12
0
def dend(file):
    seqs,seqnames=getseqs(file)
    allDistTuple,g=make_allDistTuple_fast(seqs)
    g=kstep(allDistTuple,g)
    dend=community.generate_dendrogram(g,weight='len')
    # print('-')
    for item in dend:
        print(item)
    # print('-')
    return dend, seqnames
예제 #13
0
 def busmap_by_louvain(network, level=-1):
     lines = network.lines.loc[:,['bus0', 'bus1']].assign(weight=1./network.lines.x).set_index(['bus0','bus1'])
     G = nx.Graph()
     G.add_nodes_from(network.buses.index)
     G.add_edges_from((u,v,dict(weight=w)) for (u,v),w in lines.itertuples())
     dendrogram = community.generate_dendrogram(G)
     if level < 0:
         level += len(dendrogram)
     return pd.Series(community.partition_at_level(dendrogram, level=level),
                      index=network.buses.index)
예제 #14
0
 def busmap_by_louvain(network, level=-1):
     lines = network.lines.loc[:,['bus0', 'bus1']].assign(weight=1./network.lines.x).set_index(['bus0','bus1'])
     G = nx.Graph()
     G.add_nodes_from(network.buses.index)
     G.add_edges_from((u,v,dict(weight=w)) for (u,v),w in lines.itertuples())
     dendrogram = community.generate_dendrogram(G)
     if level < 0:
         level += len(dendrogram)
     return pd.Series(community.partition_at_level(dendrogram, level=level),
                      index=network.buses.index)
예제 #15
0
 def test_modularity_increase(self):
     """
     Generate a dendrogram and test that modularity is always increasing
     """
     graph = nx.erdos_renyi_graph(1000, 0.01)
     dendo = co.generate_dendrogram(graph)
     mods = [
         co.modularity(co.partition_at_level(dendo, level), graph)
         for level in range(len(dendo))
     ]
     self.assertListEqual(mods, sorted(mods))
예제 #16
0
def comunity_detection_function(datafile,
                                G,
                                show=True,
                                db_name='facebook_combined.csv'):
    '''
    requirment:

    networkx
    matplotlib
    pandas
    community: https://bitbucket.org/taynaud/python-louvain
    '''

    #community
    #first compute the best partition
    partition = community.best_partition(G)
    values = [partition.get(node) for node in G.nodes()]
    print("community detection finished!")

    if show:
        '''
        drawing community
        '''

        ###drawing nodes
        print("drawing nodes of communites...")
        size = int(len(set(partition.values())))
        pos = nx.spring_layout(G)

        ###C0 C1 ... are color
        counter = 1
        my_colors = [('C' + str(count)) for count in range(size + counter)]

        for com in set(partition.values()):
            list_nodes = [
                nodes for nodes in partition.keys() if partition[nodes] == com
            ]
            nx.draw_networkx_nodes(G,
                                   pos,
                                   list_nodes,
                                   node_size=10,
                                   node_color=str(my_colors[counter]))
            counter += 1
        print("drawing community detection finished!")
        ##
        ##
        ###drawing edges
        print("drawing network edges...")
        nx.draw_networkx_edges(G, pos)
        plt.show()

    return community.generate_dendrogram(G)
예제 #17
0
def dendo_community(x):
    import community

    G = corr_matrix2graph(x)
    dendo = community.generate_dendrogram(G)
    dendo_community = np.array([dendo[0][key] for key in dendo[0].keys()])
    sort_index = np.argsort(dendo_community)

    sorted_x = rearrange_mat(x, sort_index)
    sorted_x = x[sort_index, :]
    sorted_x = sorted_x[:, sort_index]

    return sorted_x, sort_index
예제 #18
0
    def update(self, inputs, adj_mat=None):
        """Return the partition of the nodes at the given level.

        A dendrogram is a tree and each level is a partition of the graph nodes.
        Level 0 is the first partition, which contains the smallest communities,
        and the best is len(dendrogram) - 1.
        Higher the level is, bigger the communities are.
        """
        self.graph = nx.from_numpy_matrix(adj_mat)
        self.dendrogram = community_louvain.generate_dendrogram(self.graph)
        self.updateLabels(self.level)
        self.centroids = computeCentroids(inputs, self.labels)
        return
def louvain (graph):
    """ Louvain clustering, returns dictionary where each key is the level of
        clustering and the values are the clustering themselfs as returned by
        to_clusters_dict method.
    """
    community.__MIN = 1e-12
    dendo = community.generate_dendrogram(graph)
    multilevel = {}
    for level in range(len(dendo) - 1):
        tmp = community.partition_at_level(dendo, level)
        # tmp is a dictionary where keys are the nodes and the values are the set it belongs to
        multilevel[level] = to_clusters_dict(tmp)

    return multilevel
예제 #20
0
def louvain(graph):
    """ Louvain clustering, returns dictionary where each key is the level of
        clustering and the values are the clustering themselfs as returned by
        to_clusters_dict method.
    """
    community.__MIN = 1e-12
    dendo = community.generate_dendrogram(graph)
    multilevel = {}
    for level in range(len(dendo) - 1):
        tmp = community.partition_at_level(dendo, level)
        # tmp is a dictionary where keys are the nodes and the values are the set it belongs to
        multilevel[level] = to_clusters_dict(tmp)

    return multilevel
예제 #21
0
def modularize(edgeGraph, nodeDf, nameOfModularityColumn=u'Community_Lvl_0'):
	'''
	uses the original code of the louvain algorithm to give modularity to a graph
	'''
	#compute the best partition
	dendrogram = community.generate_dendrogram(edgeGraph, weight='weight')
	dendroBestPartitionDict = community.partition_at_level(dendrogram, len(dendrogram)-1) #dendroBestPartitionDict = community.best_partition(graph)
	#add a column to the node data frame so we can add the community values
	if nameOfModularityColumn not in nodeDf.columns:
		nodeDf[nameOfModularityColumn] = np.nan	
	#add the community values to the node data frame
	nodeDf[nameOfModularityColumn] = nodeDf[u'Id'].map(dendroBestPartitionDict)
	#making sure all 'modularity_class' NaN were deleted 
	return nodeDfCleaner(nodeDf), dendrogram
예제 #22
0
def infos_per_level(graph, labels, params, verbose=True):
    dendrogram = louvain.generate_dendrogram(graph)
    colors = {node: node for node in graph}
    infos = []
    for level in range(len(dendrogram)):
        colors = {
            node: dendrogram[level][color]
            for node, color in colors.items()
        }
        info = compute_communities_entropy(graph.nodes,
                                           colors,
                                           labels,
                                           verbose=verbose)
        infos.append(info)
    return infos
예제 #23
0
파일: run.py 프로젝트: paepcke/via_pathways
def run_louvain(experiment_dir):
    g = nx.read_edgelist(os.path.join(experiment_dir, 'projection.txt'),
                         create_using=nx.DiGraph,
                         data=[('weight', float), ('p_prereq', float),
                               ('p_course', float)])
    g = g.to_undirected()
    d = community.generate_dendrogram(g)
    level_0 = community.partition_at_level(d, 0)
    for i in range(max(level_0.values())):
        print('=' * 40)
        major = []
        for class_id, partition_num in level_0.items():
            if partition_num == i:
                major.append(class_id)
        print(major)
        print('=' * 40)
 def test_nodes_stay_together(self):
     """
     Test that two nodes in the same community at one level stay in the same at higher level
     """
     g = nx.erdos_renyi_graph(500, 0.01)
     dendo = co.generate_dendrogram(g)
     parts = dict([])
     for l in range(len(dendo)) :
         parts[l] = co.partition_at_level(dendo, l)
     for l in range(len(dendo)-1) :
         p1 = parts[l]
         p2 = parts[l+1]
         coms = set(p1.values())
         for com in coms :
             comhigher = [ p2[node] for node, comnode in p1.items() if comnode == com]
             self.assertEqual(len(set(comhigher)), 1)
예제 #25
0
def make_partitions(name='projection_graph.pickle'):
    """
    Find the communities in the network
    :param name: name of graph pickle file
    :return:
    """

    G = read(name)

    print("Generating Partition Dendogram")
    partition_dendogram = community.generate_dendrogram(G)

    with open('partition_dendogram.pickle', 'wb') as f:
        pickle.dump(partition_dendogram, f, protocol=2)

    return G, partition_dendogram
예제 #26
0
def study_dendrogram(G, filename):
  dendrogram = co.generate_dendrogram(G)
  modularity_at_level = dict()
  print("Dendrogram has {} levels".format(len(dendrogram)))
  for level in range(len(dendrogram)):
    part = co.partition_at_level(dendrogram, level)
    print("Found {} communities at level {}".format(len(set(part.values())), level))
    modularity_at_level[level] = co.modularity(part, G)

  plt.plot(list(modularity_at_level.keys()), list(modularity_at_level.values()), linestyle='dotted', marker = 'o', markersize=8)
  plt.xlabel("l - Level")
  plt.ylabel("Q - Modularity")
  if filename:
    plt.savefig("drawings/"+filename)
  plt.show()
  return dendrogram
예제 #27
0
def identify_clusters(graph, louvain_level=-1):
    """
    Identifies clusters in the given NetworkX Graph by Louvain partitioning.
    
    The parameter louvain_level controls the degree of partitioning.  0 is the most granular
    partition, and granularity decreases as louvain_level increases.  Since the number of
    levels can't be known a priori, negative values "count down" from the max - ie, -1
    means to use the maximum possible value and thus get the largest clusters
    """
    dendrogram = community.generate_dendrogram(graph)
    if louvain_level < 0:
        louvain_level = max(0, len(dendrogram) + louvain_level)
    if louvain_level >= len(dendrogram):
        #print("Warning [identify_clusters]: louvain_level set to {}, max allowable is {}.  Resetting".format(louvain_level, len(dendrogram)-1), file=sys.stderr)
        louvain_level = len(dendrogram) - 1
    #print("Cutting the Louvain dendrogram at level {}".format(louvain_level), file=sys.stderr)
    return community.partition_at_level(dendrogram, louvain_level)
예제 #28
0
def get_community(weight):
    """
    进行图聚类,发现社区
    weight: 选择哪个变量作为权重
    """
    FG = nx.Graph()
    FG.add_weighted_edges_from(graph_data[['from_id', 'to_id', weight]].values)

    result = pd.DataFrame({'id': list(FG.nodes)})
    print('node number: %s' % len(result))
    dendrogram = community.generate_dendrogram(FG)

    for level in range(len(dendrogram)):
        the_partition = community.partition_at_level(dendrogram, level)
        result['%s_label_%s' % (weight, level)] = list(the_partition.values())

    return result
예제 #29
0
def run_louvain(experiment_dir):
    g = get_networkx_graph(experiment_dir)
    g = g.to_undirected()

    d = community.generate_dendrogram(g)
    level_0 = community.partition_at_level(d, 0)

    majors = []
    for i in range(max(level_0.values())):
        major = []
        for class_id, partition_num in level_0.items():
            if partition_num == i:
                major.append(class_id)
        majors.append(major)

    with open(os.path.join(experiment_dir, 'louvain.json'), 'w') as outfile:
        json.dump(majors, outfile, indent=4)
예제 #30
0
def add_cluster_labels_to_nodes(nodes_pdf, edges_pdf, weight_col='lift'):
    """
    Decorate node_pdf with columns marking the cluster(s) each node belongs to, using the Louvain algorithm.
    These cluster columns are added to nodes_pdf as a side effect.
    """
    import networkx as nx
    G = nx.Graph()
    elist = [(r['from'], r['to'], r[weight_col])
             for i, r in edges_pdf.iterrows()]
    G.add_weighted_edges_from(elist)
    dendro = community_louvain.generate_dendrogram(G)
    for level in range(0, len(dendro)):
        cluster_level_name = f"level_{level}_cluster"
        partition = community_louvain.partition_at_level(dendro, level)
        nodes_pdf[cluster_level_name] = [
            partition[x] for x in nodes_pdf['id']
        ]  # [partition[node_id[x]] for x in nodes_pdf['label']]
예제 #31
0
def extract_network_metrics(mdg, ts, team=True):
    met = {}
    dsg = extract_dpsg(mdg, ts, team)
    if team :
        pre = 'full:'
    else:
        pre = 'user:'******'nodes_count'] = dsg.number_of_nodes()
    met[pre+'edges_count'] = dsg.number_of_edges()
    met[pre+'density'] = nx.density(dsg)
    met[pre+'betweenness'] = nx.betweenness_centrality(dsg)
    met[pre+'avg_betweenness'] = float(sum(met[pre+'betweenness'].values()))/float(len(met[pre+'betweenness'].values()))
    met[pre+'betweenness_count'] = nx.betweenness_centrality(dsg, weight='count')
    met[pre+'avg_betweenness_count'] = float(sum(met[pre+'betweenness_count'].values()))/float(len(met[pre+'betweenness_count'].values()))
    met[pre+'betweenness_effort'] = nx.betweenness_centrality(dsg, weight='effort')
    met[pre+'avg_betweenness_effort'] = float(sum(met[pre+'betweenness_effort'].values()))/float(len(met[pre+'betweenness_effort'].values()))
    met[pre+'in_degree'] = dsg.in_degree()
    met[pre+'avg_in_degree'] = float(sum(met[pre+'in_degree'].values()))/float(len(met[pre+'in_degree'].values()))
    met[pre+'out_degree'] = dsg.out_degree()
    met[pre+'avg_out_degree'] = float(sum(met[pre+'out_degree'].values()))/float(len(met[pre+'out_degree'].values()))
    met[pre+'degree'] = dsg.degree()
    met[pre+'avg_degree'] = float(sum(met[pre+'degree'].values()))/float(len(met[pre+'degree'].values()))
    met[pre+'degree_count'] = dsg.degree(weight='count')
    met[pre+'avg_degree_count'] = float(sum(met[pre+'degree_count'].values()))/float(len(met[pre+'degree_count'].values()))
    met[pre+'degree_effort'] = dsg.degree(weight='effort')
    met[pre+'avg_degree_effort'] = float(sum(met[pre+'degree_effort'].values()))/float(len(met[pre+'degree_effort'].values()))
    usg = dsg.to_undirected()
    dendo = co.generate_dendrogram(usg)
    if len(dendo)>0 and isinstance(dendo, list):
        partition = co.partition_at_level(dendo, len(dendo) - 1 )
        met[pre+'partitions'] = {}
        for com in set(partition.values()):
            members = [nodes for nodes in partition.keys() if partition[nodes] == com]
            for member in members:
                met[pre+'partitions'][member] = com
        met[pre+'louvain_modularity'] = co.modularity(partition, usg)
    else:
        met[pre+'louvain_modularity'] = None
    connected_components = nx.connected_component_subgraphs(usg)
    shortest_paths = [nx.average_shortest_path_length(g) for g in connected_components if g.size()>1]
    if len(shortest_paths) > 0:
        met[pre+'avg_distance'] = max(shortest_paths)
    else:
        met[pre+'avg_distance'] = None
    return met
예제 #32
0
 def test_nodes_stay_together(self):
     """
     Test that two nodes in the same community at one level stay in the same at higher level
     """
     g = nx.erdos_renyi_graph(500, 0.01)
     dendo = co.generate_dendrogram(g)
     parts = dict([])
     for l in range(len(dendo)):
         parts[l] = co.partition_at_level(dendo, l)
     for l in range(len(dendo) - 1):
         p1 = parts[l]
         p2 = parts[l + 1]
         coms = set(p1.values())
         for com in coms:
             comhigher = [
                 p2[node] for node, comnode in p1.items() if comnode == com
             ]
             self.assertEqual(len(set(comhigher)), 1)
def louvain(G):
    dendo = lvcm.generate_dendrogram(graph=G,
                                     weight='weight',
                                     resolution=7.,
                                     randomize=True)

    partition = lvcm.partition_at_level(dendo, len(dendo) - 1)
    #a = set(partition.values())
    print(partition)

    #partition = community_louvain.best_partition(G)
    #print(set(partition.values()))
    #print(len(set(partition.values())))
    out = defaultdict(list)
    for k, v in partition.items():
        out[v].append(k)

    print(out)
예제 #34
0
파일: util.py 프로젝트: aganve/pyvipr
def add_louvain_communities(graph, all_levels=False, random_state=None):
    graph_communities = graph.copy().to_undirected(
    )  # Louvain algorithm only deals with undirected graphs
    if all_levels:
        # We add the first communities detected, The dendrogram at level 0 contains the nodes as keys
        # and the clusters they belong to as values.
        dendrogram = generate_dendrogram(graph_communities,
                                         random_state=random_state)
        partition = dendrogram[0]
        cnodes = set(partition.values())
        graph.add_nodes_from(cnodes, NodeType='subcommunity')
        nx.set_node_attributes(graph, partition, 'parent')

        # The dendrogram at level 1 contains the new community nodes and the clusters they belong to.
        # We change the cluster names to differentiate them from the cluster names of the first clustering
        # result. Then, repeat the same procedures for the next levels.
        cluster_child_parent = dendrogram[1]
        for key, value in cluster_child_parent.items():
            cluster_child_parent[key] = '{0}_{1}'.format(1, value)
        cnodes = set(cluster_child_parent.values())
        graph.add_nodes_from(cnodes, NodeType='subcommunity')
        nx.set_node_attributes(graph, cluster_child_parent, 'parent')
        for level in range(2, len(dendrogram)):
            cluster_child_parent = dendrogram[level]
            cluster_child_parent2 = {
                '{0}_{1}'.format(level - 1, key):
                '{0}_{1}'.format(level, value)
                for (key, value) in cluster_child_parent.items()
            }
            cnodes = set(cluster_child_parent2.values())
            if level < len(dendrogram) - 1:
                graph.add_nodes_from(cnodes, NodeType='subcommunity')
            else:
                graph.add_nodes_from(cnodes, NodeType='community')
            nx.set_node_attributes(graph, cluster_child_parent2, 'parent')
            # Update nodes clusters
    else:
        communities = best_partition(graph_communities,
                                     random_state=random_state)
        # compound nodes to add to hold communities
        cnodes = set(communities.values())
        graph.add_nodes_from(cnodes, NodeType='community')
        nx.set_node_attributes(graph, communities, 'parent')
    return graph
예제 #35
0
def test_mode():
    G = nx.Graph()
    G.add_edges_from([(1, 2), (1, 3), (2, 4), (2, 3), (4, 5), (5, 3), (5, 8),
                      (8, 9), (9, 7), (7, 6), (6, 3), (2, 5), (8, 7)])
    pos = nx.spring_layout(G)
    nx.draw_networkx_nodes(G, pos, label=True)
    partition = community.best_partition(G)
    nx.draw_networkx_edges(G, pos)
    nx.draw_networkx_labels(G, pos)
    plt.show()
    values = [partition.get(node) for node in G.nodes()]
    '''
    drawing community
    '''

    ###drawing nodes
    print("drawing nodes of communites...")
    size = int(len(set(partition.values())))
    pos = nx.spring_layout(G)

    ###C0 C1 ... are color
    counter = 1
    my_colors = [('C' + str(count)) for count in range(size + counter)]

    for com in set(partition.values()):
        list_nodes = [
            nodes for nodes in partition.keys() if partition[nodes] == com
        ]
        nx.draw_networkx_nodes(G,
                               pos,
                               list_nodes,
                               node_color=str(my_colors[counter]))
        counter += 1
    print("drawing community detection finished!")
    ##
    ##
    ###drawing edges
    print("drawing network edges...")
    nx.draw_networkx_edges(G, pos)
    nx.draw_networkx_labels(G, pos)
    plt.show()
    return community.generate_dendrogram(G)
예제 #36
0
def calc_louvain(adj_matrix, level=0, return_c_graph=False):
    nx_G = nx.from_numpy_array(adj_matrix)
    dendro = louvain.generate_dendrogram(
        nx_G, randomize=False, random_state=0)  #Maybe set randomize True
    #print(dendro)
    #asdasd

    level = len(dendro) - level - 1

    if level < 0:
        raise Exception("The given Level is too deep. The maximum is: " +
                        str(len(dendro) - 1))

    communities = louvain.partition_at_level(dendro, level)
    number_communities = max(communities, key=lambda x: communities[x]) + 1

    # Maybe unnecessary after some code rework and unification
    community_list = []
    for i in range(number_communities):
        grp_list = []
        for grp in communities:
            if communities[grp] == i:
                grp_list.append(grp)
        else:
            if grp_list:
                community_list.append(grp_list)

    community_level_G = louvain.induced_graph(communities, nx_G)

    if return_c_graph:
        c_level_graph = nx.adjacency_matrix(community_level_G)
    else:
        c_level_graph = None

    inv_dendro = []
    for dct in dendro:
        inv_dct = {}
        for k, v in dct.items():
            inv_dct.setdefault(v, []).append(k)
        inv_dendro.append(inv_dct)

    return community_list, c_level_graph, dendro, inv_dendro
예제 #37
0
def extract_louvain_modularity(g):
    met = {}
    usg = g.copy()
    isolated = nx.isolates(usg)
    usg.remove_nodes_from(isolated)
    dendo = co.generate_dendrogram(usg)
    if len(dendo)>0 and isinstance(dendo, list):
        partition = co.partition_at_level(dendo, len(dendo) - 1 )
        met['partitions'] = {}
        for com in set(partition.values()):
            members = [nodes for nodes in partition.keys() if partition[nodes] == com]
            for member in members:
                met['partitions'][member] = com
        met['modularity'] = co.modularity(partition, usg)
        # for node in isolated:
        #     met['partitions'][node] = None
    else:
        met['partitions'] = None
        met['modularity'] = None
    
    return met
예제 #38
0
def preprocess():
	data = sio.loadmat('f_data/phishing_2013_filter.mat')
	phish_data = data['phish']
	prefix_data = data['networks']
#	computeWeightToFile('f_data/weight.mat', phish_data)
	G = genGraphFromFile('f_data/weight.mat')
#	print 'load file success'
	# S = ComuputeSimilarity(phish_data)
	#G = nx.Graph()
	#genGraph(phish_data, G)
	# nx.write_gml(G, 'data/graph')
	# nx.draw(G)
	# partition = communityDetect(G)
	# partition = readResult("data/partition1")
	dendo = community.generate_dendrogram(G)
	# print len(dendo)
#	print 'partition sucess', len(dendo)
#	filename = "f_data/partition"
	for level in range(len(dendo)):
		partition = community.partition_at_level(dendo,level)
		print 'size', len(set(partition.values()))
		saveResult(filename + str(level), partition)
예제 #39
0
파일: misc.py 프로젝트: rafguns/linkpred
    def predict(self):  # pylint:disable=E0202
        """Predict using community structure

        If two nodes belong to the same community, they are predicted to form
        a link. This uses the Louvain algorithm, which determines communities
        at different granularity levels: the finer grained the community, the
        higher the resulting score.

        This needs the python-louvain package. Install linkpred as follows:

        $ pip install linkpred[community]

        """
        try:
            import community
        except ImportError:
            raise ImportError("Module 'community' could not be found. "
                              "Please install linkpred as follows:\n"
                              "$ pip install linkpred[community]")

        res = Scoresheet()
        dendogram = community.generate_dendrogram(self.G)

        for i in range(len(dendogram)):
            partition = community.partition_at_level(dendogram, i)
            communities = defaultdict(list)
            weight = len(dendogram) - i  # Lower i, smaller communities

            for n, com in partition.items():
                communities[com].append(n)
            for nodes in communities.values():
                for u, v in all_pairs(nodes):
                    if not self.eligible(u, v):
                        continue
                    res[(u, v)] += weight
        return res
예제 #40
0
def gen_clusters(edges_file, resolution=dflt_resolution):
    with open(edges_file, "rb") as fp:
        G = nx.read_weighted_edgelist(fp)

    dendrogram = community.generate_dendrogram(G, resolution=0.25)
    len_d = len(dendrogram)
    print("{} items in dendrogram".format(len_d))

    gids2names.load_groups_file("data/groups.txt")

    for level in range(len_d):
        print()
        partition = community.partition_at_level(dendrogram, level)
        modularity = community.modularity(partition, G)
        print("partition at level {} is\n{}".format(level, pformat(partition)))
        print("modularity at level {} is {}".format(level, modularity))
        for com in set(partition.values()):
            list_nodes = sorted([nodes for nodes in partition.keys()
                            if partition[nodes] == com])
            print("nodes: {}".format(json.dumps(list_nodes)))
            print("    groups:")
            for gid, name in gids2names.generate_group_names(
                    group_ids_list=list_nodes):
                print("    {} {}".format(gid, name))
예제 #41
0
def load(screen_name=None, user_id=None, force_db_update = False,
                  force_twitter_update=False, debug=False):
    '''
    Main entry point into gravitty module. Should be used by importing
    gravitty and calling gravitty.load('<your_screen_name').

    Please see the readme at github.com/ericjeske/gravitty for mandatory setup
    instructions and api requirements.

    The load function will make every attempt to load data from cache
    sources (mongoDB) before using twitter's api. It is, however, suggested
    that multiple twitter api keys are utilized with this app to avoid rate
    limiting restrictions.

    By default, running this function will return a json object that can
    be parsed by d3.js to create a community graph. Additional information,
    including the raw twitter data, parsed twitter data, user similarity,
    community clustering dendrogram, community analytics data, community
    networkx graph, and community json object, can be returned by passing in
    debug=True.

    Also, by default, this app will create two pickled objects,
    one containing the debug data described above, the other containing the
    community json file. Subsequent calls for the same user will use this
    data to save time (and api calls).

    To override the use of pickled data, use force_db_update = True. Data
    for each follower will be pulled from mongoDB if possible, otherwise it
    will be pulled from twitter.

    To do a clean-slate download, downloading everything from twitter,
    use force_twitter_update = True.

    '''

    if screen_name == None and user_id == None:
        raise Exception('Please enter an id or name')

    # Assume that if screen_name was not provided (only user id) then a
    # pickle has not been created.
    if screen_name is not None:
        ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH)
        sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT
        sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT

        # Check to see if there are pickles for the user. Note that this will
        # be overriden if force_db_update is set to true
        if os.path.isfile(sn_file_debug) and debug \
                and not force_twitter_update and not force_db_update:
            return pickle.load(open(sn_file_debug, 'rb'))

        if os.path.isfile(sn_file) \
                and not force_twitter_update and not force_db_update:
            return pickle.load(open(sn_file, 'rb'))

    # Use api credentials from files located in the API_PATH.
    ABS_API_PATH = os.path.join(os.path.dirname(__file__), API_PATH)
    apis = oauth_login(ABS_API_PATH)

    # Try to start up a mongo database connection to cache data in
    try:
        conn = pymongo.MongoClient("localhost", 27017)

    except pymongo.errors.ConnectionFailure:
        print 'Please run mongod and re-run program'
        raise Exception('DBError')

    db = conn[DB_NAME]

    # Get the target user's data from either the screen_name or user_id
    user_data = get_user_data(db, apis[0],
                              name = screen_name, uid = user_id,
                              force = force_twitter_update)

    # If the user is protected (or has more than the maximum
    # followers/friends), then return an error
    if user_data == None:
        print 'Was unable to access data for %s / %s' % (screen_name, user_id)
        raise Exception('TargetError')

    user_info, user_tweets, followers, following, user_lists = user_data

    # Using the target user's list of followers (user ids), get the same
    # information we just got for the target user for each of its followers
    raw_df = get_follower_data(db, apis, followers,
                               force = force_twitter_update)

    # Filter the dataframe for inactive users. Then parse the raw dataframe
    # to extract the relevant features from the raw data
    df = parse_dataframe( filter_dataframe(raw_df) )

    # With the features in hand, calculate the latent similarity between each
    # set of users. See similarity.py for more detail on the calculations of
    # this similarity metric.

    # The resulting dataframe will be a square matrix indexed/columned by
    # user_id and contain the undirected edge weights between each pair of
    # users.
    df_similarity = make_similarity_dataframe(df)

    # Make an undirected representing the relationship between each user,
    # if any. Each node ID is the user ID, each edge weight is equal to the
    # similarity score between those two users.
    graph = make_graph(df, df_similarity)

    # Using the louvain method, find communities within the weighted graph.
    # The returned dendrogram is a list of dictionaries where the values of
    # each dictionary are the keys of the next dictionary. The length of the
    # dendrogram indicates the number of levels of community clusters
    # detected.
    dendrogram = generate_dendrogram(graph)

    # Add a final mapping to the dendrogram that maps everyone into the
    # same community. They are, after all, followers of the same user.
    dendrogram.append({k:0 for k in dendrogram[-1].values()})

    # Modify the dataframe to contain columns titled 'cid + <level>'. Each
    # column contains the community id's for that level for each user.
    # Also, this is a convenient time to calculate graph modularity at each
    # level so produce that here as well.
    df, modularity = get_community_assignment(df, graph, dendrogram)

    num_levels = len(dendrogram)

    # For each community at each level of the dendrogram, find the topics,
    # sentiment, biggest influencers, etc. for each.
    data = get_community_analytics(df, graph, num_levels,
                                   community_modularity = modularity)

    # Both the mentioned and most connected users fields from the community
    # analytics function are user ids. Turn them into screen names.
    data = get_screen_names(data, 'mentioned', df, db, apis[0])
    data = get_screen_names(data, 'most_connected', df, db, apis[0])

    # Close the database connection. It is no longer needed.
    conn.close()

    # Create a networkx graph where each node represents a community. Edges
    # represent membership into larger communities at the next level up (
    # down?) the dendrogram and have no edge weights. The data obtained in
    # the previous steps from community_analytics is loaded into the
    # attributes of each node.
    community_graph = create_community_graph(data, dendrogram)

    # Parse this graph into a json representation for use & consumption by
    # d3.js
    community_json = create_community_json(community_graph, user_info)

    # Just in case we don't have the screen name, grab it.
    if screen_name is None:
        screen_name = user_info['screen_name']

    # Pickle the objects for reuse.
    ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH)
    sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT
    sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT

    pickle.dump((raw_df, df, df_similarity, dendrogram, data,
                 community_graph, community_json), open(sn_file_debug, 'wb'))

    pickle.dump(community_json, open(sn_file, 'wb'))

    # If debug is true, return all of the precusor objects along with the json
    if debug:
        return (raw_df, df, df_similarity, dendrogram, data,
                community_graph, community_json)

    # Otherwise return the json object
    return community_json
예제 #42
0
파일: stats.py 프로젝트: ololobus/vk-cache
    k4_cores = sorted(nx.connected_component_subgraphs(nx.k_core(graph, k = 4)), key = lambda c: c.number_of_nodes(), reverse = True)
    kmax_cores = sorted(nx.connected_component_subgraphs(nx.k_core(graph, k = max_k)), key = lambda c: c.number_of_nodes(), reverse = True)
    print 'k4 cores sizes:', map(nx.number_of_nodes, k4_cores)
    print 'kmax cores sizes:', map(nx.number_of_nodes, kmax_cores)

    k4_mod = modularity(k4_cores, graph)
    kmax_mod = modularity(kmax_cores, graph)
    print 'k4 mod', k4_mod
    print 'kmax mod', kmax_mod

    k4_wcc = wcc1(k4_cores[0], graph)
    kmax_wcc = wcc1(kmax_cores[0], graph)
    print 'k4 wcc', k4_wcc
    print 'kmax wcc', kmax_wcc

    dendro = comm.generate_dendrogram(graph)

    louvain_steps = []
    for level in range(len(dendro)):
        partition = comm.partition_at_level(dendro, level)

        clusters = {}
        for key, value in sorted(partition.iteritems()):
            clusters.setdefault(value, []).append(key)

        communities = []
        for key, value in clusters.iteritems():
            if len(value) > 0:
                communities.append(graph.subgraph(value))

        louvain_steps.append([len(set(partition.values())), modularity(communities, graph)])
예제 #43
0
# set input variables
inputFile = str(sys.argv[1])
doResolution = 1. 
weighted = False
if(len(sys.argv) > 2):
	doResolution = float(sys.argv[2])		
sys.stderr.write("Using resolution " + str(doResolution) + ".\n")

# read data from edges input file 
G = networkx.Graph() # create a new undirected graph
G = networkx.read_edgelist(inputFile, nodetype=int, data=(('weight',int))) # read as int-weighted
# G = networkx.read_edgelist(inputFile, nodetype=int) # read as unweighted
sys.stderr.write("Done reading.\n")

# do community detection and get dendrograph of communities
dendo = community.generate_dendrogram(G, part_init=None, resolution=doResolution, weight='weight') 

# store communities at different levels
parts = {}
for level in range(0, len(dendo)):
	parts[level] = community.partition_at_level(dendo, level)
levels = len(dendo)

# just do plain community detection instead of nested variant
#levels = 1
#parts[0] = community.best_partition(G) # find communities

# output header to stdout
sys.stdout.write("Id")
communitySize = {}
for level in range(0, levels):