def _fit(self, parameters):
        global _current_clusters

        self.graph = _filter_graph(
            _base_graph[0].copy(),
            node_weight_threshold=self.node_weight_threshold,
            edge_weight_threshold=self.edge_weight_threshold,
            sem_weight_threshold=self.sem_weight_threshold)
        if self.communities == 0:
            self.potential_clusters = [
                sorted(clq) for clq in nx.find_cliques(self.graph)
                if len(clq) >= 3
            ]
        else:
            if _current_clusters[1][1:] == parameters[1:]:
                self.potential_clusters = [
                    sorted(clq) for clq in nx.k_clique_communities(
                        self.graph, 3, cliques=_current_clusters[0])
                ]
            else:
                self.potential_clusters = [
                    sorted(clq)
                    for clq in nx.k_clique_communities(self.graph, 3)
                ]
        _current_clusters[0] = self.potential_clusters
        _current_clusters[1] = parameters
Ejemplo n.º 2
0
def test_zachary():
    z = nx.karate_club_graph()
    # clique percolation with k=2 is just connected components
    zachary_k2_ground_truth = set([frozenset(z.nodes())])
    zachary_k3_ground_truth = set([
        frozenset([
            0, 1, 2, 3, 7, 8, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 26,
            27, 28, 29, 30, 31, 32, 33
        ]),
        frozenset([0, 4, 5, 6, 10, 16]),
        frozenset([24, 25, 31])
    ])
    zachary_k4_ground_truth = set([
        frozenset([0, 1, 2, 3, 7, 13]),
        frozenset([8, 32, 30, 33]),
        frozenset([32, 33, 29, 23])
    ])
    zachary_k5_ground_truth = set([frozenset([0, 1, 2, 3, 7, 13])])
    zachary_k6_ground_truth = set([])

    assert set(k_clique_communities(z, 2)) == zachary_k2_ground_truth
    assert set(k_clique_communities(z, 3)) == zachary_k3_ground_truth
    assert set(k_clique_communities(z, 4)) == zachary_k4_ground_truth
    assert set(k_clique_communities(z, 5)) == zachary_k5_ground_truth
    assert set(k_clique_communities(z, 6)) == zachary_k6_ground_truth
Ejemplo n.º 3
0
def test_overlaping_K5():
    G = nx.Graph()
    G.add_edges_from(combinations(range(5), 2)) # Add a five clique
    G.add_edges_from(combinations(range(2,7), 2)) # Add another five clique
    c = list(nx.k_clique_communities(G, 4))
    assert_equal(c,[frozenset([0, 1, 2, 3, 4, 5, 6])])
    c= list(nx.k_clique_communities(G, 5))
    assert_equal(c,[])
Ejemplo n.º 4
0
    def findClaimID(self, cliqueSize, density, mean=0.1, std=0.05):

        # STEP1: allocate subnets for fraud rings
        if self.subset is None:

            c = nx.k_clique_communities(self.G, cliqueSize)
            N = len(self.G)
            nodeList = None

            for count, n in enumerate(list(c)):

                print 'Community = ', count, ' Size = ', len(n)
                if nodeList is None:
                    nodeList = n
                elif len(n) > (mean - std) * N and len(n) < (mean + std) * N:
                    nodeList = n

            self.subset = list(nodeList)

        if self.subset:
            self.fraudRingModifier(density=density)

        # STEP2: calculate network KPIs
        c = nx.k_clique_communities(self.G, cliqueSize)
        maximum = 0.0

        for count, n in enumerate(list(c)):

            N_claim = 0.
            N_part = 0.

            for NI in n:
                if self.G.node[NI]['label'].split('_')[0] == 'ClaimID':
                    N_claim += 1.0
                else:
                    N_part += 1.0

            for NI in n:

                ratio = N_claim / N_part
                self.G.node[NI]['modularityClass'] = count + 1
                self.G.node[NI]['fraudScore'] = ratio

                if maximum < ratio:
                    maximum = ratio

        # STEP3: assign fraud scores
        c = nx.k_clique_communities(self.G, cliqueSize)

        for count, n in enumerate(list(c)):

            for NI in n:
                self.G.node[NI]['fraudScore'] /= maximum
                self.G.node[NI]['fraudScore'] *= 100
                self.G.node[NI]['fraudScore'] = int(
                    self.G.node[NI]['fraudScore'])
Ejemplo n.º 5
0
def report_communities(graph: networkx.Graph):
    communities = {}
    for community_id, community in enumerate(
            networkx.k_clique_communities(graph, 3)):
        for node in community:
            communities[node] = community_id + 1

    # Group actors from same communities together
    community_to_actors = {}
    for key, val in communities.items():
        if val not in community_to_actors:
            community_to_actors[val] = []
        community_to_actors[val].append(key)

    # Sort based on the length of the list of actors
    community_to_actors_sorted = sorted(community_to_actors.items(),
                                        key=lambda element: len(element[1]),
                                        reverse=True)

    print("=" * 80)
    print("COMMUNITIES:")
    for community in community_to_actors_sorted[:10]:
        print("ID {}, {} actors: {}".format(community[0], len(community[1]),
                                            ", ".join(community[1])))
    print("=" * 80)

    # Add as attribute to graph
    for actor, community_id in communities.items():
        graph.node[actor]['community_id'] = community_id
def output_graph_stats(g):
    """
	Clique percolation, along with some other NetworkX statistics
	about the generated graph that may be useful.
	"""
    stat_file = open(OUTPUT_STATS, 'w')
    stat_file.write("NOTE: graph is treated as an unweighted graph" + "\n\n")
    stat_file.write(str(nx.info(g)) + "\n\n")
    stat_file.write("TRANSITIVITY: " + str(nx.transitivity(g)) + "\n\n")
    clust_coeffs = nx.clustering(g)
    stat_file.write("NODES WITH CLUST COEFF = 1: " + "\n")
    for node in clust_coeffs:
        if clust_coeffs[node] == 1.0:
            stat_file.write(node + " " + str(g.neighbors(node)) + "\n")
    stat_file.write("AVG CLUSTERING COEFFICIENT: " +
                    str(nx.average_clustering(g)) + "\n\n")
    stat_file.write("DEGREE HISTOGRAM: " + str(nx.degree_histogram(g)) +
                    "\n\n")
    stat_file.write("NODES WITH HIGHEST DEGREE CENTRALITY: " + "\n")
    stat_file.write(
        str(
            sorted(nx.degree_centrality(g).items(),
                   key=operator.itemgetter(1),
                   reverse=True)[:5]) + "\n\n")
    stat_file.write("4-CLIQUE COMMUNITIES (clique percolation): " + "\n")
    for clique in nx.k_clique_communities(g, 4):
        stat_file.write(str(clique) + "\n")
    stat_file.write("\nMAXIMAL CLIQUES: " + "\n")
    for clique in nx.find_cliques(g):
        if len(clique) >= 3:
            stat_file.write(str(clique) + "\n")
Ejemplo n.º 7
0
Archivo: sna.py Proyecto: dgawlik/ed
    def all_users_group_evolution(self):

        slots = []
        for i in range(1,7):
            G = self._graph_from_cursor('all_posts_s%d' % i)

            communities = nx.k_clique_communities(G, 3)
            communities = sorted(communities, key = lambda c: len(c), reverse = True)[:10]
            slots.append(communities)


        for i in range(5):
            comm1 = slots[i]
            comm2 = slots[i+1]

            # remapped = []
            # for c1 in comm1:
            #     remapped.append(max(comm2, key=lambda c2: len(set(c1) & set(c2))))

            # slots[i+1] = remapped

        cover = []
        for i in range(1,6):
            row = []
            for j in range(10):
                isl = len(set(slots[i-1][j]) & set(slots[i][j]))
                n = len(slots[i-1][j])
                f = isl*100/n
                row.append(f)
            cover.append(row)

        for i in range(5):
            print cover[i]
def user_clique(user):
    
    G = get_graph(user)
    c = maxrank(user)
    
    print str(len(G.nodes())) + " nodes for user " + str(user)

    if user in [345,0,21869,18844]:
        return (user, [[c]])
    # do not calculate for large graphs (it takes too long)
    if len(G.nodes()) > tooManyNodesThreshold:
        return (user, [[c]])

    # find comunities using k_clique_communities()
    listOfCircles = []
    kCliqueComunities = list(nx.k_clique_communities(G,cliqueSize))
    for community in kCliqueComunities:
        # leave only relativly large communities
        if len(community) >= tooLittleFriendsInCircleThreshold:
            listOfCircles.append(list(community))

    # if no prediction was created, use max pagerank friend
    if len(listOfCircles) == 0:
        return (user, [[c]])
    else:
        return (user, listOfCircles)
Ejemplo n.º 9
0
def plotUnweightedCommunities(G, k_clique, n_nodes, iw):

    cls = nx.find_cliques(G)
    communities = list(nx.k_clique_communities(G, k_clique, cliques=cls))

    print(len(communities))

    pos = nx.graphviz_layout(G)  # positions for all nodes

    plt.figure(figsize=(12, 12))

    #colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"]

    #	colors = ["#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040","#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040"]

    for i in range(len(communities)):
        nx.draw_networkx_nodes(G,
                               pos,
                               nodelist=list(communities[i]),
                               node_color=colors[i % len(colors)])

    nx.draw_networkx_edges(G, pos, width=0.5)
    # labels
    #nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif')

    plt.axis('off')
    plt.savefig("./communities/unweighted_" + "comm_" + "w" + str(iw) + "k" +
                str(k_clique) + ".png")  # save as png
    plt.close()
Ejemplo n.º 10
0
def plotWeightedCommunities(G, W_lim, k_clique, n_nodes):
    for i in range(0, n_nodes):
        for j in range(i, n_nodes):
            if (i != j):
                print(i, j)
                if (G[i][j]['weight'] < W_lim):
                    G.remove_edge(i, j)

    cls = nx.find_cliques(G)
    communities = list(nx.k_clique_communities(G, k_clique, cliques=cls))

    print(len(communities))

    pos = nx.graphviz_layout(G)  # positions for all nodes

    plt.figure(figsize=(12, 12))

    #colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"]

    for i in range(len(communities)):
        nx.draw_networkx_nodes(G,
                               pos,
                               nodelist=list(communities[i]),
                               node_color=colors[i])

    nx.draw_networkx_edges(G, pos, width=0.5)
    # labels
    #nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif')

    plt.axis('off')
    plt.savefig("comm_w_" + str(W_lim) + "k" + str(k_clique) +
                ".png")  # save as png
    plt.close()
Ejemplo n.º 11
0
def singleDetection(twitter_api, group):
    #handle a single group

    #delete repeated user ids
    group=list(set(group))

    G=nx.Graph()

    #construct graph for the group by finding all the edges
    for user_id in group:
        friends_ids, followers_ids = get_friends_followers_ids(twitter_api, user_id=user_id)
        friends_ids_str=[str(friend_id) for friend_id in friends_ids]
        followers_ids_str=[str(follower_id) for follower_id in followers_ids]
        vertices = list(set(friends_ids_str) & set(group) & set(followers_ids_str)) #find nodes connected to current node in the group
        edges=[(str(user_id), vertex)for vertex in vertices]
        G.add_edges_from(edges)

    #find communites using CPM
    c=[]
    k=0
    for size in range(3,6):
        c_original=list(nx.k_clique_communities(G, size))
        if len(c_original)>0:
            c_listed=[list(froz) for froz in c_original]
            c=c_listed #if find communities using larger k, drop previous ones
            k=size
        else:
            break

    return G.nodes(), G.edges(), c
Ejemplo n.º 12
0
    def cliqueData(self, cnumber, data, header, sg, k):
        for n in data:
            data[n] += [0, 0, 0]
        com = list(nx.k_clique_communities(sg, k))
        header += [
            str(k) + '_clique_size',
            str(k) + '_clique_edges',
            str(k) + '_clique_density'
        ]
        for c in com:
            count = self.edgesCount(list(c))
            for n in c:
                if len(c) > data[n][-3]:
                    data[n][-3] = len(c)
                    data[n][-2] = count
                    data[n][-1] = count / (data[n][-3] * data[n][-3] -
                                           data[n][-3])
        header += [str(k) + '_min_edges', str(k) + '_min_edges_ratio']
        for n in data:
            if data[n][-1] > 0:
                data[n].append(np.ceil(data[n][-3] * cnumber[n] / 2))
                data[n].append(data[n][-3] / data[n][-1])
            else:
                data[n] += [0, 0]

        return data, header
Ejemplo n.º 13
0
def k_comm(Gn):
    comm_list_G = list(nx.k_clique_communities(Gn, 3))  # k=3
    comm_list = []
    for item in comm_list_G:
        item = map(int, item)
        comm_list.append(list(item))
    return comm_list
Ejemplo n.º 14
0
def plotWeightedCommunities(G, W_lim, k_clique, n_nodes):
	for i in range(0,n_nodes):
		for j in range(i,n_nodes):
			if(i!=j):
				if(G[i][j]['weight'] < W_lim):
					G.remove_edge(i,j)

	cls = nx.find_cliques(G)
	communities = list(nx.k_clique_communities(G,k_clique ,cliques = cls))

	print(len(communities))

	pos=nx.graphviz_layout(G) # positions for all nodes


	plt.figure(figsize=(12,12))

	#colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"]

	for i in range(len(communities)):
		nx.draw_networkx_nodes(G,pos,nodelist=list(communities[i]),node_color=colors[i])

	nx.draw_networkx_edges(G,pos,width=0.5)
			# labels
	nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif')

	plt.axis('off')
	plt.savefig("comm_w_"+str(W_lim)+"k"+str(k_clique)+".png") # save as png
	plt.close()
Ejemplo n.º 15
0
Archivo: app.py Proyecto: dgawlik/ed
def popular_group_centralities():
    path = '../sna/all_posts_s5.graphml'
    G = nx.read_graphml(path)

    cliques =  nx.k_clique_communities(G, 3)
    cliques = list(cliques)

    popular_clique = sorted(cliques, key=lambda x:len(x), reverse=True)[0]

    G = G.subgraph(popular_clique)

    degree = nx.degree_centrality(G)
    pagerank = nx.pagerank(G)
    closeness = nx.closeness_centrality(G)
    betweeness = nx.betweenness_centrality(G)

    sd = pandas.Series(degree)
    sp = pandas.Series(pagerank)
    sc = pandas.Series(closeness)
    sb = pandas.Series(betweeness)

    print sd.describe()
    print sp.describe()
    print sc.describe()
    print sb.describe()

    sd.plot.hist(bins=100).get_figure().savefig('../sna/pg_degree.png')
    sp.plot.hist(bins=100).get_figure().savefig('../sna/pg_pagerank.png')
Ejemplo n.º 16
0
def plotUnweightedCommunities(G, k_clique, n_nodes,iw):

	cls = nx.find_cliques(G)
	communities = list(nx.k_clique_communities(G,k_clique ,cliques = cls))

	print(len(communities))

	pos=nx.graphviz_layout(G) # positions for all nodes


	plt.figure(figsize=(12,12))

	#colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"]

#	colors = ["#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040","#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040"]

	for i in range(len(communities)):
		nx.draw_networkx_nodes(G,pos,nodelist=list(communities[i]),node_color=colors[i%len(colors)])

	nx.draw_networkx_edges(G,pos,width=0.5)
			# labels
	nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif')

	plt.axis('off')
	plt.savefig("./communities/unweighted_"+"comm_"+"w"+str(iw)+"k"+str(k_clique)+".png") # save as png
	plt.close()
Ejemplo n.º 17
0
def test_isolated_K5():
    G = nx.Graph()
    G.add_edges_from(combinations(range(0, 5), 2))  # Add a five clique
    G.add_edges_from(combinations(range(5, 10), 2))  # Add another five clique
    c = list(nx.k_clique_communities(G, 5))
    assert_equal(set(c),
                 set([frozenset([0, 1, 2, 3, 4]),
                      frozenset([5, 6, 7, 8, 9])]))
Ejemplo n.º 18
0
    def graph_communities(self):
        communities = nx.k_clique_communities(self.G, 6)
        communities_index = {}
        for i, group in enumerate(communities):
            for member in group:
                communities_index[member] = i + 1

        return communities_index
Ejemplo n.º 19
0
def kcliques_to_html(G):
    kcliques = list(networkx.k_clique_communities(G, 2))
    #pdb.set_trace()
    kcliques_colors = [random.randint(0,1000000)*len(l) for l in kcliques]
    for clique in kcliques:
        color = kcliques_colors[kcliques.index(clique)]
        for node in clique:
            G.node[node]['kclique'] = color
Ejemplo n.º 20
0
def clique(gr):
    G=g_load(gr)
    li=list(list(networkx.k_clique_communities(G,2))[0])
    
    mv=majority_vote.list_majority(li,G)
    #print(mv)
    #s=sorted(mv.items(),key=operator.itemgetter(1),reverse=True)
    return mv
Ejemplo n.º 21
0
    def getCliques(self, G='default', nclique=4):

        if G == 'default':
            G = self.G

        cliques = list(nx.k_clique_communities(G, nclique))

        return cliques
Ejemplo n.º 22
0
def get_coalesced_communities(g, no_overlap=False):

    average_clique_size = int(get_average_clique_size(g))
    communities = map(lambda c: Community(c), nx.k_clique_communities(g, average_clique_size))
    communities = coalesce_communities(communities, .7)

    communities2 = map(lambda c: Community(c), nx.k_clique_communities(g, 3))
    communities2 = coalesce_communities(communities2, .7)

    communities = communities + filter(lambda c: len(c.members) <= 10, communities2)
    communities = coalesce_communities(communities, .7)
    communities = filter(lambda c: len(c.members) > 1, communities)

    if not no_overlap:
        return communities

    members = set()
    overlapping_subs = set()

    for c in communities:
        for s in c.members:
            if s in members:
                overlapping_subs.add(s)
            else:
                members.add(s)

    for overlapped in overlapping_subs:
        best_weight = 0
        best_comm = None

        overlapped_comms = filter(lambda c: overlapped in c.members, communities)
        for community in overlapped_comms:
            source = overlapped
            weight = 0
            for target in community.members:
                if g.has_edge(source, target):
                    weight += g[source][target]['weight']

            if weight > best_weight:
                best_weight = weight
                best_comm = community

        for c in overlapped_comms:
            c.members.remove(overlapped)
        best_comm.members.add(overlapped)
    return communities
Ejemplo n.º 23
0
    def graph_communities(self):
        communities = nx.k_clique_communities(self.G, 6)
        communities_index = {}
        for i, group in enumerate(communities):
            for member in group:
                communities_index[member] = i + 1

        return communities_index
Ejemplo n.º 24
0
def nxCommunity():
    import networkx as nx
    import matplotlib.pyplot as plt
    G = nx.connected_caveman_graph(6, 4)
    #first compute the best partition
    c = list(nx.k_clique_communities(G, 5))
    print c
    nx.draw(G)
    plt.show()
def main():
    # reads from file and building the LastFm network
    artists, net = ma.read_data()
    listening = ma.get_node_listening(artists)
    print_vectors(artists)
    communities = list(nx.k_clique_communities(net, 5))
    community_statistics(communities, 'k-clique')

    community_listening(communities, listening, artists, 'k-clique')
Ejemplo n.º 26
0
def report_communities(graph):
    communities = {node: cid + 1 for cid, community in enumerate(networkx.k_clique_communities(graph, 3)) for node in
                   community}

    pos = networkx.circular_layout(graph)
    networkx.draw(graph, pos, font_size=8,
                  labels={v: str(v) for v in graph},
                  cmap=plt.get_cmap("rainbow"),
                  node_color=[communities[v] if v in communities else 0 for v in graph])
    plt.savefig("communities.png")
 def find_k_cliques(self, k = 3):
     for kc in nx.k_clique_communities(self.G, k):
         self.k_cliques.append(set(kc))
     self.k_clique_groups = self.find_common_subsets(self.k_cliques)
     if self.verbose:
         print 'K-CLIQUES' 
         print 'Found %s k-cliques' %(len(self.k_cliques))
         print 'Found %s k-clique groups' %(len(self.k_clique_groups))
         
     return len(self.k_clique_groups)
Ejemplo n.º 28
0
def test_zachary():
    z = nx.karate_club_graph()
    # clique percolation with k=2 is just connected components
    zachary_k2_ground_truth = set([frozenset(z.nodes())])
    zachary_k3_ground_truth = set([frozenset([0, 1, 2, 3, 7, 8, 12, 13, 14, 
                                              15, 17, 18, 19, 20, 21, 22, 23, 
                                              26, 27, 28, 29, 30, 31, 32, 33]),
                                   frozenset([0, 4, 5, 6, 10, 16]),
                                   frozenset([24, 25, 31])])
    zachary_k4_ground_truth = set([frozenset([0, 1, 2, 3, 7, 13]),
                                   frozenset([8, 32, 30, 33]),
                                   frozenset([32, 33, 29, 23])])
    zachary_k5_ground_truth = set([frozenset([0, 1, 2, 3, 7, 13])])
    zachary_k6_ground_truth = set([])

    assert set(k_clique_communities(z, 2)) == zachary_k2_ground_truth
    assert set(k_clique_communities(z, 3)) == zachary_k3_ground_truth
    assert set(k_clique_communities(z, 4)) == zachary_k4_ground_truth
    assert set(k_clique_communities(z, 5)) == zachary_k5_ground_truth
    assert set(k_clique_communities(z, 6)) == zachary_k6_ground_truth
Ejemplo n.º 29
0
def spatial_major(in_folder, cliqueThresh):
    for file in os.listdir(in_folder):
        if file != '.DS_Store':  #weird MAC thing
            path = in_folder + file
            G = getJsonNet(path)
            G = nx.Graph(G)  #can't do cliques on directed nets
            if G.order() > 0:
                n = nx.graph_clique_number(G)
                if n > cliqueThresh:
                    print file
                    for c in nx.k_clique_communities(G, n):
                        print list(c)
Ejemplo n.º 30
0
def k_clique_CD(graph, cut_str, k_range):
    print '########\tK-CLIQUE CD WITH K RANGE = ' + str(k_range) + '\t########'
    # num_cliques = nx.number_of_cliques(actor_network_cut3)
    # print len(num_cliques)
    for k in k_range:
        print '\n########\t'+str(k)+'-CLIQUE CD '+cut_str+' START\t########'
        output = nx.k_clique_communities(graph, k)
        output_communities_list = list(map(list, output))  # per covertire tutte le communities in liste
        print '########\t'+str(k)+'-CLIQUE CD '+cut_str+' COMPLETE\t########'
        output_file = OUTPUT_DIRECTORY_KCLIQUE+"/kclique_actor_"+str(k)+"_"+cut_str+".txt"
        print '> numero di community trovate: ' + str(len(output_communities_list))
        serialize_communities(output_communities_list, output_file)
Ejemplo n.º 31
0
def make_modules_multik(graph, k=None):
    """make modules with networkx k-clique communities and annotate network"""
    if k is None:
        k = [2, 3, 4, 5, 6]
    communities = dict()
    for k_val in list(k):
        cliques = list(nx.k_clique_communities(graph, k_val))
        cliques = [list(i) for i in cliques]
        communities[k_val] = cliques
        for i, clique in enumerate(cliques):
            for node in clique:
                graph.node[node]['k_' + str(k_val)] = i
    return graph, {k: list(v) for k, v in communities.iteritems()}
Ejemplo n.º 32
0
def k_clique_analysis(G,k_list,out_path):
	for k in k_list:
		c = list(nx.k_clique_communities(G, k))
		print(type(c[0]))
		quit()
		c = list(map(list,c))
		out = open(out_path + str(k) + "_clique.dat","w")
		for community in c:
			out.write("%d\t[" % c.index(community))
			for node in community:
				out.write('"%s",' % node)
			out.write("]\n")
		out.close()
def main():
    # reads from file and building the LastFm network
    artists, net = ma.read_data()
    listening = ma.get_node_listening(artists)

    genre_vectors, artist_vector = build_vectors(artists)

    communities_kcliques = list(nx.k_clique_communities(net, 5))
    community_statistics(communities_kcliques, 'k-clique')
    community_listening(communities_kcliques, listening, artists, 'k-clique', genre_vectors, artist_vector)

    communities_demon = read_demon()
    community_statistics(communities_demon, 'demon')
    community_listening(communities_demon, listening, artists, 'demon', genre_vectors, artist_vector)
Ejemplo n.º 34
0
def findCommunites(threshold=0.5, sector=None, k=5, force=False):
	th = re.sub(r'([0-9]*)\.([0-9]*)',r'\1\2',str(threshold))
	if sector != None:
		graphInFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_nx_"+sector+"_th"+th+".xml"
		graphOutFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx_"+sector+"_th"+th+"_k"+str(k)+".xml"
		outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx_"+sector+"_th"+th+"_k"+str(k)+".csv"
	else:
		graphInFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_nx_th"+th+".xml"
		graphOutFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx"+"_th"+th+"_k"+str(k)+".xml"
		outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx"+"_th"+th+"_k"+str(k)+".csv"

	print "reading graph from file: ", graphInFilename
	print "writing graph with community info to file: ", outFilename
	print "writing community details in csv format to file: ", outFilename

	if force or not isfile(graphOutFilename):
		g = nx.read_graphml(graphInFilename)
		#freq = findFreqOfCliquesInGraph(g)
		#plotHistFromDict(freq)
		
		comm = nx.k_clique_communities(g, k)
		communities = []
		for c in comm:
			communities.append(c) 
		
		numCommunities = len(communities)
		print "number of communities found: ", numCommunities
		
		colors = range(numCommunities)

		i = 0
		for c in communities:
			for v in c:
				g.node[v]['cluster'] = colors[i] + 1
			i += 1

		nx.write_graphml(g, graphOutFilename)
			
		import csv
		with open(outFilename, "wb") as f:
			writer = csv.writer(f, delimiter='|', quotechar="'", quoting=csv.QUOTE_MINIMAL)
			writer.writerow(["sector", "symbol", "name", "cluster"])
			for v in g:
				writer.writerow([g.node[v]['sector'], g.node[v]['symbol'], g.node[v]['name'], g.node[v]['cluster']])

		results = PROCESSED_FILE_LOC + "results.csv"
		with open(results, "a") as f1:
			f1.write(str(dt.datetime.today()) + "," + outFilename + "," + str(numCommunities) + "," + str(calculateModularity(graphOutFilename)) + "\n")

		drawGraph(graphOutFilename, "gt")
def predict_user(G,
                 cliqueSize=5,
                 tooLittleFriendsInCircleThreshold=10,
                 #tooManyNodesThreshold=220
                 ):
    # find comunities using k_clique_communities()
    listOfCircles = []
    kCliqueComunities = list(nx.k_clique_communities(G,cliqueSize))
    for community in kCliqueComunities:
        # leave only relativly large communities
        if len(community) >= tooLittleFriendsInCircleThreshold:
            listOfCircles.append(list(community))

    return listOfCircles
Ejemplo n.º 36
0
def DetectionComNX(graphe,path):

    import networkx as nx

    graph=nx.read_edgelist(path+graphe)
    kcliques=nx.k_clique_communities(graph,3,nx.find_cliques(graph))
    c=list(kcliques)
    commClus={}
    for indx,val in enumerate(c):
        temp=[]
        for j in val:
            temp.append(int(j.replace("n","")))
        commClus[indx]=temp
    return commClus
Ejemplo n.º 37
0
def calculateModularity():
    #very slow!, exp(N) complexity
    community_growth = []
    communities = []
    max_cliques = 20
    for k in tqdm(range(max_cliques),desc='Running k-clique modularity algorithm'):
        communities.append(list(nx.k_clique_communities(G_gn,k+2)))
    community_growth = [len(list(x)) for x in communities]
    plt.plot([k for k in range(2,max_cliques+2)],community_growth,'*')
    plt.xlabel('clique size (k)')
    plt.ylabel('Qty Clique Communities')
    plt.title('Number of k-clique communities vs clique size via percolation method')
    plt.grid()
    plt.show()
    
    return [k for k in range(2,max_cliques+2)],communities
Ejemplo n.º 38
0
def get_comunidades(grafico):
    comunidades = list(nx.k_clique_communities(grafico.to_undirected, 3, nx.find_cliques(grafico.to_undirected()))) #si todo salio bien, los sets de miembros son solamente de ids de usuario sin numeros enteros agregados
    g = nx.Graph() #ahora volvemos a crear un grafico para visualizar todo

    for i in comunidades:
        g.add_nodes_from(i)

    for i in comunidades:
        g.add_edges_from(list(permutations(i,2)))

    pos = nx.spring_layout(g)
    plt.axis('off')
    nx.draw_networkx(g, pos)
    plt.show()

    return comunidades
Ejemplo n.º 39
0
def make_modules(graph, k=3, prefix="module"):
    """make modules with networkx k-clique communities and annotate network"""
    premodules = list(nx.k_clique_communities(graph, k))
    # reverse modules so observations will be added to smallest modules
    premodules = list(enumerate(premodules))
    premodules.reverse()

    modules = dict()
    seen = set()
    for i, module in premodules:
        # process module
        module = module-seen
        seen = seen | module
        modules[prefix+"_"+str(i)] = module
        for node in module:
            graph.node[node][prefix] = i
    return graph, modules
def week4():
    path = "D:\Dropbox\PhD\My Work\Algorithms\@Machine Learning\Lectures\Social Network Analysis\Week 4_Community Structure\wikipedia.gml"
    wiki = nx.read_gml(path)
    wiki = wiki.to_undirected()
    
    # cliques
    cid, cls = max(nx.node_clique_number(wiki).iteritems(), key=operator.itemgetter(1))
    print 'clique', cid, ' size:', cls
    
    # k-cores
    kcs = nx.k_core(wiki)
    print 'k-core size:', len(kcs.node)
    
    # community 
    cs = list(nx.k_clique_communities(wiki, 2))
    ratio = (len(cs[0]) + 0.0) / len(wiki.node)
    print 'community ratio:', ratio
Ejemplo n.º 41
0
def get_statistics():
    """
    uses data statistics to find the most important papers in the collection.
    """
    
    
    with open(MAIN_FOLDER + "network.pkl", "r") as f:
        network = pickle.load(f)

    betweenness_dict = networkx.betweenness_centrality(network.to_undirected())
    sorted_betweenness = sorted(betweenness_dict.items(), key=operator.itemgetter(1), reverse=True)
    betweenness = [x[0] for x in sorted_betweenness[:5]]

    pagerank_dict = networkx.pagerank(network.to_undirected())
    sorted_pagerank = sorted(pagerank_dict.items(), key=operator.itemgetter(1), reverse=True)
    pagerank = [x[0] for x in sorted_pagerank[:5]]

    hits_dict = networkx.hits(network.to_undirected())
    sorted_hits = sorted(hits_dict[0].items(), key=operator.itemgetter(1), reverse=True)
    hits = [x[0] for x in sorted_hits[:5]]

    in_degree_dict = network.in_degree()
    sorted_in_degree = sorted(in_degree_dict.items(), key=operator.itemgetter(1), reverse=True)
    in_degree = [x[0] for x in sorted_in_degree[:5]]

    community_dict = []
    for k in xrange(20):
        community_dict += list(networkx.k_clique_communities(network.to_undirected(), 21 - k))
        if community_dict:
            break

    modules = []
    for index, community in enumerate(community_dict):
        modules.append([])
        for p in community:
            modules[index].append(p)

    statistics = {'in_degree': in_degree, 'betweenness': betweenness, 'hits': hits, 'pagerank': pagerank,
                  'modules': modules}

    with open(MAIN_FOLDER + "statistics.pkl", "wa") as f:
        pickle.dump(statistics, f)
Ejemplo n.º 42
0
def main():
    """docstring for main"""
    g = build_graph(sys.argv[1])
    global nodes_
    global nrows_, ncols_
    cliques = nx.k_clique_communities(g_, 5)
    img = np.zeros(shape=(nrows_, ncols_), dtype=np.uint16)
    for i, c in enumerate(cliques):
        pos = [nodes_[x] for x in c]
        # pos = np.array( pos )
        print(pos)
        if len(pos) < 4:
            continue
        for p in pos:
            cv2.circle(img, p, 1, 10 * i)
            # cv2.putText( img, str(i), p, cv2.FONT_HERSHEY_COMPLEX, 0.1, i )
        # cv2.polylines( img, np.int32( [ pos ] ), False, i )
    plt.imshow(img, cmap='gray', interpolation='none', aspect='auto')
    plt.colorbar()
    plt.savefig('corr.png')
Ejemplo n.º 43
0
def main():
    """docstring for main"""
    g = build_graph( sys.argv[1] )
    global nodes_
    global nrows_, ncols_
    cliques = nx.k_clique_communities( g_, 5 )
    img = np.zeros( shape=(nrows_,ncols_), dtype=np.uint16 )
    for i, c in enumerate(cliques):
        pos =  [ nodes_[x] for x in c ]
        # pos = np.array( pos )
        print( pos )
        if len(pos) < 4:
            continue
        for p in pos:
            cv2.circle( img, p, 1, 10*i )
            # cv2.putText( img, str(i), p, cv2.FONT_HERSHEY_COMPLEX, 0.1, i )
        # cv2.polylines( img, np.int32( [ pos ] ), False, i ) 
    plt.imshow( img, cmap='gray', interpolation = 'none', aspect = 'auto' )
    plt.colorbar(  )
    plt.savefig( 'corr.png' )
Ejemplo n.º 44
0
def clusterGraph(G):
    print("Clustering and Colorizing Graph")
    c = list(nx.k_clique_communities(G, COMMUNITY_SIZE=10))
    usedColors = list()
    for cluster in c:
        goldenRatio = 0.618033988749895
        h = random.random()
        color = "0"
        while color in usedColors or color == "0":
            h += goldenRatio
            h %= 1
            rgb = colorsys.hsv_to_rgb(h, 0.5, 0.95)
            color = "#{0:02x}{1:02x}{2:02x}".format(int(rgb[0] * 255),
                                                    int(rgb[1] * 255),
                                                    int(rgb[2] * 255))
        usedColors.append(color)
        for nodeID in cluster:
            print("Giving", nodeID, "Color", color)
            G.node[nodeID]['color'] = color
    print("Used Colors:", usedColors)
    return G
Ejemplo n.º 45
0
def printCommunities(graph):
    communities = {}
    for community_id, community in enumerate(nx.k_clique_communities(graph, 3)):
        for node in community:
            communities[node] = community_id + 1

    community_actors = {}
    for key, val in communities.items():
        if val not in community_actors:
            community_actors[val] = []
        community_actors[val].append(key)

    print("\nCommunities:")
    print("\t5 biggest communities size: ", end="")
    for comm in sorted(community_actors.items(), key= lambda item: len(item[1]), reverse=True)[:5]:
        print(len(comm[1]), end=",")
    print("\n\tMembers of biggest community:")
    for comm in sorted(community_actors.items(), key=lambda item: len(item[1]), reverse=True)[:1]:
        print("\t{}".format(comm[1]))

    for actor, community_id in communities.items():
        graph.node[actor]['community_id'] = community_id
def get_ego_kclique_communities(ego):
    if ego in [5881, 12800]:
        print 'In get_ego_kclique_communities, skipping ego', ego
        return {}

    ego_kcc_dmp = join(DATA_DIR, 'cliques', 'kcc_%s.zip'%ego)
    if os.path.exists(ego_kcc_dmp):
        with zipfile.ZipFile(ego_kcc_dmp, mode='r') as zf:
            ccs = json.loads(zf.read('files1.json'))
    else:
        ego_cliques = get_ego_cliques(ego)
        print 'Processing k-clique communities: nx.find_cliques, ego:', ego
        G = load_ego_graph(ego)
        ccs = [list(cc) for cc in nx.k_clique_communities(G, 6, cliques=ego_cliques)]
        try:
            import zlib
            compression = zipfile.ZIP_DEFLATED
        except:
            compression = zipfile.ZIP_STORED
        json_rslt = json.dumps(ccs, ensure_ascii=False, indent=True)
        with zipfile.ZipFile(ego_kcc_dmp, mode='w') as zf:
            zf.writestr('files1.json', json_rslt, compress_type=compression)
    return ccs
Ejemplo n.º 47
0
Archivo: sna.py Proyecto: dgawlik/ed
    def all_users_timeslots(self):

        stats = []
        for i in range(1,7):
            G = self._graph_from_cursor('all_posts_s%d' % i)

            for nd in G.nodes():
                G.node[nd]['color'] = 0

            j = 1
            cl = []
            for clique in nx.k_clique_communities(G, 3):
                cl.append(clique)
                for nd in clique:
                    G.node[nd]['color'] = j
                j += 1

            n = len(G.nodes())
            e = len(G.edges())

            d = pandas.Series(nx.degree_centrality(G)).mean()
            eg = pandas.Series(nx.eigenvector_centrality(G, max_iter=1000)).mean()

            closeness = {}
            for node in random.sample(G.nodes(),100):
                closeness[node] =  nx.closeness_centrality(G, node)
            c = pandas.Series(closeness).mean()

            b = pandas.Series(nx.betweenness_centrality(G, k=20)).mean()

            stats.append((n,e,d,eg,c,b,cl))
            nx.write_graphml(G, '../sna/all_posts_s%d.graphml'%i)

        print 'nodes, edges, m. degree, m. eigenvector, m. closeness, m. betweeness'
        for s in stats:
            print '%s, %s, %s, %s, %s, %s' % (s[0], s[1], s[2], s[3], s[4], s[5])
	def calculateSimilarities(self):
		"""
		TODO: Break up graph into communities, if a user A
		belongs to a community C, similarities[A] = list of (all other nodes in C, score=1) tuples
		"""

		min_community_size = 5

		# Calculate similarities and populate similarities dict here
		print "Finding CommunitySimilarity! Yay!"
		communities = list(nx.k_clique_communities(self.yelpGraph, min_community_size))
		print "Number of test communities: %d" % len(communities)
		total_size = 0
		for c in communities:
			#print len(c)
			total_size += len(c)
		print "Total size: %d" % total_size

		# for each community
		count = 0
		for community in communities:
			print "We've entered a new community!"
			# for every node in the community
			for user in community:
				count += 1
				self.similarities[user] = list()
				# make a list of all other tuples that are in the community (skip self)
				for friend in community:
					if friend != user:
						self.similarities[user].append((friend, 1))
		print "The count is %d" % count


		print "Number of pairs: %d .... should be equal to 2995" % len(self.similarities)
		# Write similarity map to file
		pickle.dump(self.similarities, open( "communitySim.p", "wb" ) )
Ejemplo n.º 49
0
            if not G.has_edge(terms[i],
                              terms[j]):  # add edge if it is not there already

                G.add_edge(terms[i], terms[j])
                G.edge[terms[i]][terms[j]]['freq'] = 1  # the count is 1

            else:
                G.edge[terms[i]][terms[j]][
                    'freq'] += 1  # existing edge, increment the count

f.close()

#remove all edges with a freq less than 3
remove = []
for N1, N2 in G.edges():  # for each edge
    if G.edge[N1][N2]['freq'] < 3:
        remove.append((N1, N2))  # add it to the 'remove' list

G.remove_edges_from(remove)  # filter

#find all maximal cliques
cliques = list(nx.find_cliques(G))
sorted_cliques = sorted(cliques, key=len, reverse=True)  # sort cliques by size
print(sorted_cliques[0])

#find all k-cliques communities
kcliques = list(nx.k_clique_communities(G, 3))
sorted_cliques = sorted(kcliques, key=len,
                        reverse=True)  # sort cliques by size
print(sorted_cliques[0])
Ejemplo n.º 50
0
    tupl = sorted_Pr[ii]
    print tupl

#Hits and Authorities
h, a = nx.hits(G_hybrid)

number = 5
print "hubs nodes"

sorted_h = sorted(h.items(), key=operator.itemgetter(1), reverse=True)
for ii in range(number):
    tupl = sorted_h[ii]
    print tupl

number = 5
print "authority nodes"

sorted_a = sorted(a.items(), key=operator.itemgetter(1), reverse=True)
for ii in range(number):
    tupl = sorted_a[ii]
    print tupl

#Cliques
ratioDenominator = 1000
smallestSize = nx.number_of_nodes(G_hybrid) / ratioDenominator

communities = list(nx.k_clique_communities(G_hybrid, smallestSize))

for community in communities:
    print list(community)
for userId in list(submission['UserId']):

    # read graph
    filename = str(userId) + '.egonet'
    G = read_nodeadjlist(egonetFolderName + filename)

    # do not calculate for large graphs (it takes too long)
    if len(G.nodes()) > tooManyNodesThreshold:
        print('skipping user ' + str(userId))
        continue
    else:
        print('predicting for user ' + str(userId))

    # find comunities using k_clique_communities()
    listOfCircles = []
    kCliqueComunities = list(nx.k_clique_communities(G,cliqueSize))
    for community in kCliqueComunities:
        # leave only relativly large communities
        if len(community) >= tooLittleFriendsInCircleThreshold:
            listOfCircles.append(list(community))

    # populate prediction string
    predictionString = ''
    for circle in listOfCircles:
        for node in circle:
            predictionString = predictionString + str(node) + ' '
        predictionString = predictionString[:-1] + ';'
    predictionString = predictionString[:-1]

    # if no prediction was created, use 'all friends in one circle'
    if len(listOfCircles) > 0:
Ejemplo n.º 52
0
		for c in communities:
			if name in c:
				nodes.append({
					'name':name,
					'group':g
				})
			g = g + 1
	return nodes

G = nx.Graph()
topAuthorNames = [a[0] for a in authorsCounter.most_common(50)] # top 40 authors
G.add_nodes_from(topAuthorNames) # assign nodes, edges are assigned in cooccurrence_links()
collaborators = collaborators_matrix(authors)
cooccurrences = {'nodes':[], 'links':[]}
cooccurrences['links'] = cooccurrence_links(authorsCounter, collaborators)
communities=list(nx.k_clique_communities(G,3)) # detect communities and than assign groups
cooccurrences['nodes'] = cooccurrence_nodes(authorsCounter, 50, communities)

# dest = '/Users/asif/Sites/scholars/neuromodulation/cooccurrences.json'
# f = open(dest, 'w+')
# f.write(json.dumps(cooccurrences))
# f.close()

###########################################################
# Edge-bindings collaborators
###########################################################

# Edge bindings
def collaborators_bindings(groupCounters, collaborators):
	mostfreq = [x[0] for x in groupCounters.most_common(50)]
	collaborators_bindings = []
def calculate_k_clique(G, K):
    communities = nx.k_clique_communities(G, K)
    if verbose:
        print "k-cliques " + str(K)
    write_csv_groups('./data/results/kClique' + str(K) + '.csv', communities)
Ejemplo n.º 54
0
Archivo: main.py Proyecto: azizur77/wcg
	#g.remove_bridges("/home/kazem/weakc/out",0,",")
	g.export_G_to_csv(1)
	#diam_ = nx.algorithms.distance_measures.diameter(g.G)
	#print "diameter of G: %d" %diam_
	
if(0):
	g = WCG(100)
	edge_thr = 1800
	#let's read the contact duration between pairs
	reader = csv.reader(open("/Users/kazemjahanbakhsh/Downloads/graph_cd.txt"), delimiter=',')
	for line in reader:
		if float(line[2]) > edge_thr:
			g.G.add_edge(int(line[0]),int(line[1]),weight=float(line[2]))
	#plot G
	#g.plot_graph(g.G)
	c = list(nx.k_clique_communities(g.G, 4))
	print c
if(0):
    g = WCG(100)
    g.build_csv("/home/kazem/data/FB/facebook-links.txt", 0, "\t")
	#g.findWhiskers("/home/kazem/weakc/out","/home/kazem/weakc/export.csv",0,",")
    #H = g.G.subgraph([60512, 60513, 60514, 60515, 60516, 60517, 60518, 60519, 60520, 60508, 60509, 60510, 60511])
    #H = g.G.subgraph([63008, 63687, 54988, 54989, 54990, 54991, 55065, 55066, 63356, 63357, 63007])
    #H = g.G.subgraph([54380, 54381, 54382, 54383, 54384, 54385, 54386, 54387, 54388, 35581])
    #H = g.G.subgraph([62496, 60099, 60100, 45811, 45812, 45813, 50652, 61054, 61055])
    #H = g.G.subgraph([48870, 48871, 48872, 49773, 49774, 50609, 50610, 50611, 50612])
    H = g.G.subgraph([45958, 45959, 45960, 45961, 45962, 45963, 49551, 49082, 49083, 49084, 49085, 49086, 59970, 59971, 51115, 51116, 62238, 62239, 61477, 55530, 61143, 59418, 59419, 59583, 59055, 59052, 59053, 59054, 41127, 47426, 47427, 47428, 47429, 57631, 49120, 49121, 49122, 54528, 49119, 63321, 59490, 52851, 52852, 63483, 52888, 52889, 52890, 59523, 59522, 54816, 60288, 62493, 62437, 60287, 59792, 59793, 59794, 59790, 59791, 37891, 37892, 15773, 36262, 59569, 60724, 60614, 28727, 61408, 55076, 55075, 29972, 48870, 48871, 48872, 49773, 49774, 50609, 50610, 50611, 50612, 62496, 60099, 60100, 45811, 45812, 45813, 50652, 61054, 61055, 54380, 54381, 54382, 54383, 54384, 54385, 54386, 54387, 54388, 35581, 63008, 63687, 54988, 54989, 54990, 54991, 55065, 55066, 63356, 63357, 63007, 60512, 60513, 60514, 60515, 60516, 60517, 60518, 60519, 60520, 60508, 60509, 60510, 60511, 57953, 57801, 57802, 57946, 53501, 53502, 53503])

if(0):
    g = WCG(2048)
    g.build_csv("/Users/kazemjahanbakhsh/Downloads/facebook-links.txt", 0, "\t")
Ejemplo n.º 55
0
def community(G,number):
    c = list(nx.k_clique_communities(G,number))
    return c