def prepare_communities(self): if hasattr(community, 'generate_dendrogram'): self.dendrogram = community.generate_dendrogram(self.g) else: self.dendrogram = community.generate_dendogram(self.g) for level in range(len(self.dendrogram)): pass
def test_modularity_increase(self): """ Generate a dendogram and test that modularity is always increasing """ g = nx.erdos_renyi_graph(1000, 0.01) dendo = co.generate_dendogram(g) mod_prec = -1. mods = [co.modularity(co.partition_at_level(dendo, level), g) for level in range(len(dendo)) ] self.assertListEqual(mods, sorted(mods))
def __init__(self, directed_graph): self.directed_graph = directed_graph dendogram = community.generate_dendogram(self.directed_graph.to_undirected()) partitions = community.partition_at_level(dendogram, len(dendogram)-1) communities = self._get_communities(partitions) major_communities = self._get_large_communities(communities) self.community_graphs = self._build_community_graphs(communities, valid_communities=major_communities) self.community_rankings = self._pagerank_communities(self.community_graphs)
def Mod(G,usebest=True,l=1): D = G.to_undirected() dendo = community.generate_dendogram(D, None) if usebest: level = len(dendo)-1 else: level = l partition = community.partition_at_level(dendo,level) mod = community.modularity(partition, D) for n in G: G.node[n]['m'] = partition[n] return mod
def test_nodes_stay_together(self): """ Test that two nodes in the same community at one level stay in the same at higher level """ g = nx.erdos_renyi_graph(500, 0.01) dendo = co.generate_dendogram(g) parts = dict([]) for l in range(len(dendo)) : parts[l] = co.partition_at_level(dendo, l) for l in range(len(dendo)-1) : p1 = parts[l] p2 = parts[l+1] coms = set(p1.values()) for com in coms : comhigher = [ p2[node] for node, comnode in p1.iteritems() if comnode == com] self.assertEqual(len(set(comhigher)), 1)
def louvain(adjacency_matrix): """ Performs community embedding using the LOUVAIN method. Introduced in: Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008). Fast unfolding of communities in large networks. Journal of Statistical Mechanics: Theory and Experiment, 2008(10), P10008. Inputs: - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix. Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix. """ # Convert to networkx undirected graph. adjacency_matrix = nx.from_scipy_sparse_matrix(adjacency_matrix, create_using=nx.Graph()) # Call LOUVAIN algorithm to calculate a hierarchy of communities. tree = community.generate_dendogram(adjacency_matrix, part_init=None) # Embed communities row = list() col = list() append_row = row.append append_col = col.append community_counter = 0 for i in range(len(tree)): partition = community.partition_at_level(tree, i) for n, c in partition.items(): append_row(n) append_col(community_counter + c) community_counter += max(partition.values()) + 1 row = np.array(row) col = np.array(col) data = np.ones(row.size, dtype=np.float64) louvain_features = sparse.coo_matrix( (data, (row, col)), shape=(len(partition.keys()), community_counter), dtype=np.float64) return louvain_features
def louvain(adjacency_matrix): """ Performs community embedding using the LOUVAIN method. Introduced in: Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008). Fast unfolding of communities in large networks. Journal of Statistical Mechanics: Theory and Experiment, 2008(10), P10008. Inputs: - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix. Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix. """ # Convert to networkx undirected graph. adjacency_matrix = nx.from_scipy_sparse_matrix(adjacency_matrix, create_using=nx.Graph()) # Call LOUVAIN algorithm to calculate a hierarchy of communities. tree = community.generate_dendogram(adjacency_matrix, part_init=None) # Embed communities row = list() col = list() append_row = row.append append_col = col.append community_counter = 0 for i in range(len(tree)): partition = community.partition_at_level(tree, i) for n, c in partition.items(): append_row(n) append_col(community_counter + c) community_counter += max(partition.values()) + 1 row = np.array(row) col = np.array(col) data = np.ones(row.size, dtype=np.float64) louvain_features = sparse.coo_matrix((data, (row, col)), shape=(len(partition.keys()), community_counter), dtype=np.float64) return louvain_features
def predict(self): """Predict using community structure If two nodes belong to the same community, they are predicted to form a link. This uses the Louvain algorithm, which determines communities at different granularity levels: the finer grained the community, the higher the resulting score. You'll need to install Thomas Aynaud's python-louvain package from https://bitbucket.org/taynaud/python-louvain for this. """ try: from community import generate_dendogram, partition_at_level except ImportError: raise ImportError("Module 'community' could not be found. " "Please install python-louvain from " "https://bitbucket.org/taynaud/python-louvain") from collections import defaultdict res = Scoresheet() dendogram = generate_dendogram(self.G) for i in range(len(dendogram)): partition = partition_at_level(dendogram, i) communities = defaultdict(list) weight = len(dendogram) - i # Lower i, smaller communities for n, com in six.iteritems(partition): communities[com].append(n) for nodes in six.itervalues(communities): for u, v in all_pairs(nodes): if not self.eligible(u, v): continue res[(u, v)] += weight return res
#... define BC network if verbose: print "....define graph in networkx format" G=nx.Graph() for i in CC_table: for j in CC_table[i]: if ((not ref_journal_flag) or (ref_journal_flag and ref_index[i]['journal'] in ref_journal_list and ref_index[j]['journal'] in ref_journal_list)) and (CC_table[i][j]>=thr): w_ij = (1.0 * CC_table[i][j]) / math.sqrt(nA[i] * nA[j]) G.add_edge(i, j, weight=w_ij) nx.draw_spring(G) dst = os.path.join(out_dir, 'CC-Network(ccthr=%d, thr=%d, ref_journal_flag=%s).png' % (ccthr, thr, ref_journal_flag)) plt.savefig(dst) plt.close('all') #... if verbose: print "....computing communities with Louvain algo" dendogram = community.generate_dendogram(G, part_init=None) #... output infos print "....There are %d references in the database (contain duplicates)" % (nb_total_refs) print "....There are %d references in the database (contain no duplicate)" % (nb_refs) print "....There are %d references in the CC network\n......(ie sharing at least %d article(s) with another reference)" % (len(G.nodes()), ccthr) for level in range(len(dendogram)): part = community.partition_at_level(dendogram, level) mod = community.modularity(part, G) nb_comm = len(set(part.values())) size_sup10 = 0; size_sup100 = 0; #communities_caracteristics(partition, thr, level) for com in set(part.values()) : list_nodes = [nodes for nodes in part.keys() if part[nodes] == com] if len(list_nodes) > 100: size_sup100 += 1 if len(list_nodes) > 10: size_sup10 += 1 print "....level %d: %d communities [%d with size > 10, %d with size > 100], modularity Q=%1.6f" % (level, nb_comm, size_sup10, size_sup100, mod)
# Copyright (c) 2010 Howard Hughes Medical Institute. # All rights reserved. # Use is subject to Janelia Farm Research Campus Software Copyright 1.1 license terms. # http://license.janelia.org/license/jfrc_copyright_1_1.html """ A script to demonstrate community detection. Uses the community module bundled with Neuroptikon and written by Thomas Aynaud <http://perso.crans.org/aynaud/communities/>. """ import community updateProgress(gettext('Finding communities...'), forceDisplay = True) dendogram = community.generate_dendogram(network.simplifiedGraph()) updateProgress(gettext('Finding communities...')) partition = community.best_partition(dendogram) if any(partition): updateProgress(gettext('Isolating communities...')) for visibles in list(display.visibles.itervalues()): for visible in visibles: if visible.isPath() and not isinstance(visible.client, Stimulus): startCommunity, endCommunity = [partition[node.client.networkId] for node in visible.pathEndPoints()] if startCommunity != endCommunity: display.removeVisible(visible) updateProgress(gettext('Visually separating communities...')) display.setViewDimensions(2) for obj in network.objects: display.setVisiblePosition(obj, fixed = False) try:
link_list, jfile, ) evt += 1 # can't be anoated if evt > 0: break print 'fnum ' + str(fnum) uids = cntuids.keys() # id of all users pnum = len(uids) print 'pnum ' + str(pnum) matrix = [{} for i in xrange(pnum)] layout_matrix = [{} for i in xrange(pnum)] G = build_G(pnum, uids, matrix, layout_matrix) dendo = cm.generate_dendogram(G) partition = cm.partition_at_level(dendo, len(dendo) - 1) layout_G = build_layout_G(layout_matrix) print 'nx.spring_layout' begin = time.time() pos = nx.spring_layout(layout_G, iterations=30) # default 50 print 'pos ' + (str(time.time() - begin)) uid_loc = {} uid_cls = {} cat_cnt = [0 for i in xrange(cnum)] group = {} for i in xrange(len(uids)): cat_cnt[partition[i]] += 1 uid_loc[uids[i]] = [float(pos[i][0]), float(pos[i][1])]
#best partition calculation cluster2 = community.best_partition(graph) #print run time for c1 runtimeC2 = timeit.default_timer() - startC2 mod2 = community.modularity(cluster2, graph) graph = nx.read_edgelist("Data/a.data") #timer start c1 startC3 = timeit.default_timer() tmp = community.generate_dendogram(graph) cluster3 = community.partition_at_level(tmp, 0) #print run time for c1 runtimeC3 = timeit.default_timer() - startC3 mod3 = community.modularity(cluster3, graph) print "modularity: 1:%f; 2:%f; 3:%f" % (mod1,mod2, mod3) nmi1 = calculate_NMI(cluster1, cluster2) print "nmi between cluster1 and cluster 2: %.10f" % nmi1 nmi2 = calculate_NMI(cluster1, cluster3) print "nmi between cluster1 and cluster 3: %.10f" % nmi2 nmi3 = calculate_NMI(cluster2, cluster3) print "nmi between cluster2 and cluster 3: %.10f" % nmi3
clust= nx.average_clustering(net) add("clustering", centralization) mcc = max(nx.connected_component_subgraphs(net),key=len) ecc = nx.eccentricity(mcc) min_ecc = min(ecc.values()) size_center = sum([1 for node in mcc if ecc[node]<=min_ecc+1]) diameter = max(ecc.values()) ed =effective_diameter(net) add("diameter", diameter) add("effective_diameter",ed) add("size_center", size_center) dendo =com.generate_dendogram(net) dic_com_nodes=defaultdict(list) for node,community in dendo[0].iteritems(): dic_com_nodes[community].append(node) mod = com.modularity(dendo[0],net) add("nb_communities", len(dic_com_nodes)) add("modularity", mod) path_to_file = file.replace(".gexf","") write_edgelist(path_to_file) write_motifs(path_to_file) for motif, score in analyse_motifs(path_to_file): add("motif_"+str(motif), score) if not os.path.exists("results_a/"): os.makedirs("results_a/")
def print_dendrogram(self): dendo = community.generate_dendogram(self.G) for level in range(len(dendo) - 1) : print "partition at level", level, "is", community.partition_at_level(dendo, level)
def BC_network(in_dir, out_dir, verbose): ## INPUT DATA if verbose: print "..Initialize" src1 = os.path.join(in_dir, "articles.dat") src5 = os.path.join(in_dir, "references.dat") Ymin = 2100 Ymax = 1900 # store the min and max publication year nR = dict() # store the number of refs of the articles pl = Utils.Article() pl.read_file(src1) nb_art = len(pl.articles) # store the number of articles within database for l in pl.articles: nR[l.id] = 0 if (l.year > 1900 and l.year < 2100): if (l.year > Ymax): Ymax = l.year if (l.year < Ymin): Ymin = l.year ## CREATE BC WEIGHT TABLE if verbose: print "..Create the 'Bibliographic Coupling' weight table" ref_table = dict() # store the id of articles using a given ref BC_table = dict( ) # store the number of common refs between pairs of articles if verbose: print "....loading refs table" pl = Utils.Ref() pl.read_file(src5) for l in pl.refs: foo = l.firstAU + ', ' + str( l.year) + ', ' + l.journal + ', ' + l.volume + ', ' + l.page if foo in ref_table: ref_table[foo].append(l.id) else: ref_table[foo] = [l.id] nR[l.id] += 1 if verbose: print "....detecting common references" for foo in ref_table: if len(ref_table[foo]) > 1: for i in ref_table[foo]: for j in ref_table[foo]: if i < j: if i not in BC_table: BC_table[i] = dict() if j not in BC_table[i]: BC_table[i][j] = 0 BC_table[i][j] += 1 """ ## EXPORT BC WEIGHT TABLE if verbose: print "Output the BC_weight table" filename = os.path.join(in_dir, "BCweight.txt") f_BC = open(filename,'w') for i in BC_table: for j in BC_table[i]: w_ij = (1.0 * BC_table[i][j]) / math.sqrt(nR[i] * nR[j]) #f_BC.write("%d\t%d\t%f\t%d\n" % (i, j, w_ij, BC_table[i][j]) ) if i > j: f_BC.write("%d %d %1.7f\n" % (i, j, w_ij) ) f_BC.close() """ # choose threshold confirm = 'n' thr = 1 while confirm != 'y': if thr == 1: print "Keep BC links between articles sharing at least %d reference" % ( thr) else: print "Keep BC links between articles sharing at least %d references" % ( thr) confirm = raw_input("Confirm (y/n): ") while confirm not in ['n', 'y']: confirm = raw_input("...typing error!\n Confirm (y/n): ") if confirm == 'n': thr = input( "threshold for BC links -- articles should be share at least ? references:" ) bcthr = thr ############################## ## BC COMMUNITIES if verbose: print "..BC communities" #... define BC network if verbose: print "....define graph in networkx format" G = nx.Graph() for i in BC_table: for j in BC_table[i]: if BC_table[i][j] >= thr: w_ij = (1.0 * BC_table[i][j]) / math.sqrt(nR[i] * nR[j]) G.add_edge(i, j, weight=w_ij) #... if verbose: print "....computing communities with Louvain algo" dendogram = community.generate_dendogram(G, part_init=None) #... output infos print "....There are %d articles in the database" % (nb_art) print "....There are %d articles in the BC network\n......(ie sharing at least one reference with another article)" % ( len(G.nodes())) for level in range(len(dendogram)): part = community.partition_at_level(dendogram, level) mod = community.modularity(part, G) nb_comm = len(set(part.values())) size_sup10 = 0 size_sup100 = 0 #communities_caracteristics(partition, thr, level) for com in set(part.values()): list_nodes = [nodes for nodes in part.keys() if part[nodes] == com] if len(list_nodes) > 100: size_sup100 += 1 if len(list_nodes) > 10: size_sup10 += 1 print "....level %d: %d communities [%d with size > 10, %d with size > 100], modularity Q=%1.6f" % ( level, nb_comm, size_sup10, size_sup100, mod) ############################## ## WHICH EXTRACTION ? print "..BC communities extraction" # confirm = 'n' level = len(dendogram) - 1 thr = 10 while confirm != 'y': part = community.partition_at_level(dendogram, level) nb_comm = len(set(part.values())) size_sup_thr = 0 n_sup_thr = 0 for com in set(part.values()): list_nodes = [nodes for nodes in part.keys() if part[nodes] == com] if len(list_nodes) > thr: n_sup_thr += len(list_nodes) size_sup_thr += 1 print "....Extraction of level %d BC communities with size > %d\n......(%d articles gathered in %d communities):" % ( level, thr, n_sup_thr, size_sup_thr) confirm = raw_input("....do you confirm? (y/n): ") if confirm == 'n': level = input("......level you want to extract:") thr = input("......keep communities of size > to:") #... partition partition = community.partition_at_level(dendogram, level) list_nodes = dict() for com in set(partition.values()): list_nodes[com] = [ nodes for nodes in partition.keys() if partition[nodes] == com ] ############################## ## COMMUNITIES CARACTERISTICS if verbose: print "..Computing communities caracteristics" #.. ini filename = os.path.join( out_dir, "BCcomm_ID_Cards(bcthr=%d, thr=%d).tex" % (bcthr, thr)) f_out = open(filename, "w") f_out.write( "\documentclass[a4paper,11pt]{report}\n\usepackage[english]{babel}\n\usepackage[latin1]{inputenc}\n\usepackage{amsfonts,amssymb,amsmath}\n\usepackage{pdflscape}\n\usepackage{color}\n\n\\addtolength{\evensidemargin}{-60pt}\n\\addtolength{\oddsidemargin}{-60pt}\n\\addtolength{\\textheight}{80pt}\n\n\\title{{\\bf Communities ID Cards}}\n\date{\\begin{flushleft}This document gather the ``ID Cards'' of the BC communities found within your database.\\\\\n The BC network was built by keeping a link between articles sharing at least %d references. The communities characterized here correspond to the ones found in the level %d (in the sense of the Louvain algo) which gathers more than %d articles.\\\\\n These ID cards displays the most frequent keywords, subject categories, journals of publication, institution, countries, authors, references and reference journals of the articles of each community. The significance of an item $\sigma = \sqrt{N} (f - p) / \sqrt{p(1-p)}$ [where $N$ is the number of articles within the community and $f$ and $p$ are the proportion of articles respectively within the community and within the database displaying that item ] is also given (for example $\sigma > 5$ is really highly significant).\\\\\n\\vspace{1cm}\n\copyright Sebastian Grauwin, Liu Weizhi - (2014) \end{flushleft}}\n\n\\begin{document}\n\\begin{landscape}\n\maketitle\n" % (bcthr, level, thr)) #.. quantitative comm_innerw = dict() comm_size = dict() for com in list_nodes: size = len(list_nodes[com]) W = 0 for id1 in list_nodes[com]: for id2 in list_nodes[com]: if id2 > id1 and id2 in G.edge[id1]: W += G.edge[id1][id2]['weight'] W *= 2.0 / (size * (size - 1)) comm_innerw[com] = 1.0 / W comm_size[com] = size Lcomm_size = comm_size.items() Lcomm_size.sort(cmpval) #.. frequency / significance of keywords, etc... comm_label = dict() (stuffK, stuffS, stuffJ, stuffA, stuffI, stuffC, stuffR, stuffRJ) = BCUtils.comm_tables(in_dir, partition, thr, verbose) #.. output tables for elm in Lcomm_size: if elm[1] > thr: com = elm[0] #K if com in stuffK: if len(stuffK[com]) > 0: comm_label[com] = stuffK[com][0][0] else: comm_label[com] = 'XXXX' f_out.write( "\clearpage\n\n\\begin{table}[!ht]\n\caption{The community ``%s'' contains $N = %d$ articles. Its average internal link weight is $<\omega_{in}> \simeq 1/%d$ }\n\\textcolor{white}{aa}\\\\\n{\scriptsize\\begin{tabular}{|l r r|}\n\hline\nKeyword & f(\\%%) & $\sigma$\\\\\n\hline\n" % (comm_label[com], comm_size[com], comm_innerw[com])) for i in range(len(stuffK[com])): if len(stuffK[com][i][0]) < 30: f_out.write("%s & %1.2f & %1.2f\\\\\n" % (stuffK[com][i][0], stuffK[com][i][1], stuffK[com][i][2])) else: aux = stuffK[com][i][0].rfind(' ') while aux > 30: aux = stuffK[com][i][0][0:aux].rfind(' ') f_out.write("%s & & \\\\\n" % (stuffK[com][i][0][0:aux])) f_out.write("$\quad$%s & %1.2f & %1.2f\\\\\n" % (stuffK[com][i][0][aux:], stuffK[com][i][1], stuffK[com][i][2])) for i in range(max(0, 20 - len(stuffK[com]))): f_out.write(" & & \\\\\n") else: f_out.write( "\clearpage\n\n\\begin{table}[!ht]\n\caption{The community ``?'' contains $N = %d$ articles. Its average internal link weight is $<\omega_{in}> \simeq 1/%d$ }\n\\textcolor{white}{aa}\\\\\n{\scriptsize\\begin{tabular}{|l r r|}\n\hline\nKeyword & f(\\%%) & $\sigma$\\\\\n\hline\n" % (comm_size[com], comm_innerw[com])) for i in range(20): f_out.write(" & & \\\\\n") #S f_out.write( "\hline\n\hline\nSubject & f(\\%) & $\sigma$\\\\\n\hline\n") if com in stuffS: for i in range(len(stuffS[com])): f_out.write("%s & %1.2f & %1.2f\\\\\n" % (stuffS[com][i][0], stuffS[com][i][1], stuffS[com][i][2])) for i in range(max(0, 10 - len(stuffS[com]))): f_out.write(" & & \\\\\n") else: for i in range(10): f_out.write(" & & \\\\\n") #J f_out.write( "\hline\n\hline\nJournal & f(\\%) & $\sigma$\\\\\n\hline\n") if com in stuffJ: for i in range(len(stuffJ[com])): f_out.write("%s & %1.2f & %1.2f\\\\\n" % (stuffJ[com][i][0], stuffJ[com][i][1], stuffJ[com][i][2])) for i in range(max(0, 10 - len(stuffJ[com]))): f_out.write(" & & \\\\\n") else: for i in range(10): f_out.write(" & & \\\\\n") f_out.write("\hline\n\end{tabular}\n}\n") #f_out.write("\hline\n\end{tabular}\n}\n\end{table}\n\n") #I f_out.write( "{\scriptsize\\begin{tabular}{|l r r|}\n\hline\nInstitution & f(\\%) & $\sigma$\\\\\n\hline\n" ) if com in stuffI: for i in range(len(stuffI[com])): if len(stuffI[com][i][0]) < 30: f_out.write("%s & %1.2f & %1.2f\\\\\n" % (stuffI[com][i][0], stuffI[com][i][1], stuffI[com][i][2])) else: aux = stuffI[com][i][0].rfind(' ') while aux > 30: aux = stuffI[com][i][0][0:aux].rfind(' ') f_out.write("%s & & \\\\\n" % (stuffI[com][i][0][0:aux])) f_out.write("$\quad$%s & %1.2f & %1.2f\\\\\n" % (stuffI[com][i][0][aux:], stuffI[com][i][1], stuffI[com][i][2])) for i in range(max(0, 20 - len(stuffI[com]))): f_out.write(" & & \\\\\n") else: for i in range(20): f_out.write(" & & \\\\\n") #C f_out.write( "\hline\n\hline\nCountry & f(\\%) & $\sigma$\\\\\n\hline\n") if com in stuffC: for i in range(len(stuffC[com])): f_out.write("%s & %1.2f & %1.2f\\\\\n" % (stuffC[com][i][0], stuffC[com][i][1], stuffC[com][i][2])) for i in range(max(0, 10 - len(stuffC[com]))): f_out.write(" & & \\\\\n") else: for i in range(10): f_out.write(" & & \\\\\n") #A f_out.write( "\hline\n\hline\nAuthor & f(\\%) & $\sigma$\\\\\n\hline\n") if com in stuffA: for i in range(len(stuffA[com])): f_out.write("%s & %1.2f & %1.2f\\\\\n" % (stuffA[com][i][0], stuffA[com][i][1], stuffA[com][i][2])) for i in range(max(0, 10 - len(stuffA[com]))): f_out.write(" & & \\\\\n") else: for i in range(10): f_out.write(" & & \\\\\n") f_out.write("\hline\n\end{tabular}\n}\n") #R f_out.write( "{\scriptsize\\begin{tabular}{|l r r|}\n\hline\nReference & f(\\%) & $\sigma$\\\\\n\hline\n" ) if com in stuffR: for i in range(len(stuffR[com])): if len(stuffR[com][i][0]) < 50: f_out.write("%s & %1.2f & %1.2f\\\\\n" % (stuffR[com][i][0], stuffR[com][i][1], stuffR[com][i][2])) elif len(stuffR[com][i][0]) < 90: aux = stuffR[com][i][0].rfind(' ') while aux > 50: aux = stuffR[com][i][0][0:aux].rfind(' ') f_out.write("%s & & \\\\\n" % (stuffR[com][i][0][0:aux])) f_out.write("$\quad$%s & %1.2f & %1.2f\\\\\n" % (stuffR[com][i][0][aux:], stuffR[com][i][1], stuffR[com][i][2])) else: aux1 = stuffR[com][i][0].rfind(' ') while aux1 > 90: aux1 = stuffR[com][i][0][0:aux1].rfind(' ') aux2 = stuffR[com][i][0][0:aux1].rfind(' ') while aux2 > 50: aux2 = stuffR[com][i][0][0:aux2].rfind(' ') f_out.write("%s & & \\\\\n" % (stuffR[com][i][0][0:aux2])) f_out.write("$\quad$%s & & \\\\\n" % (stuffR[com][i][0][aux2:aux1])) f_out.write("$\quad$%s & %1.2f & %1.2f\\\\\n" % (stuffR[com][i][0][aux1:], stuffR[com][i][1], stuffR[com][i][2])) for i in range(max(0, 25 - len(stuffR[com]))): f_out.write(" & & \\\\\n") else: for i in range(25): f_out.write(" & & \\\\\n") #RJ f_out.write( "\hline\n\hline\nRefJournal & f(\\%) & $\sigma$\\\\\n\hline\n") if com in stuffRJ: for i in range(len(stuffRJ[com])): if len(stuffRJ[com][i][0]) < 50: f_out.write("%s & %1.2f & %1.2f\\\\\n" % (stuffRJ[com][i][0], stuffRJ[com][i][1], stuffRJ[com][i][2])) else: aux = stuffRJ[com][i][0].rfind(' ') while aux > 50: aux = stuffRJ[com][i][0][0:aux].rfind(' ') f_out.write("%s & & \\\\\n" % (stuffRJ[com][i][0][0:aux])) f_out.write("$\quad$%s & & \\\\\n" % (stuffRJ[com][i][0][aux:])) for i in range(max(0, 10 - len(stuffRJ[com]))): f_out.write(" & & \\\\\n") else: for i in range(10): f_out.write(" & & \\\\\n") f_out.write("\hline\n\end{tabular}\n}\n\end{table}\n\n") #.. end f_out.write("\end{landscape}\n\n\end{document}\n") f_out.close() if verbose: print "..Communities caracteristics extracted in .tex 'IDCards' file" ############################## ## OUTPUT GEPHI FILES #... output gephi if verbose: print "..Preparing gephi gdf file for BC communities network" ## ... ini name = "BC_comm_level%d(bcthr=%d, thr=%d).gdf" % (level, bcthr, thr) dst = os.path.join(out_dir, name) f_gephi = open(dst, 'w') ## ... prep nodes if verbose: print "....nodes" f_gephi.write( "nodedef>name VARCHAR,label VARCHAR,size DOUBLE,inv_innerweight DOUBLE\n" ) for com in comm_size: if comm_size[com] > thr: f_gephi.write( "%d,'%s',%d,%1.0f\n" % (com, comm_label[com], comm_size[com], comm_innerw[com])) ## ... prep edges if verbose: print "....edges" f_gephi.write( "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,logweight DOUBLE\n") for com1 in list_nodes: for com2 in list_nodes: size1 = len(list_nodes[com1]) size2 = len(list_nodes[com2]) if size1 > thr and size2 > thr and com1 > com2: W = 0 for id1 in list_nodes[com1]: for id2 in list_nodes[com2]: if id2 in G.edge[id1]: W += G.edge[id1][id2]['weight'] W *= 1000.0 / (size1 * size2) if W > 0.000001: f_gephi.write( "%d,%d,%1.9f,%1.2f\n" % (com1, com2, W, 6 + math.log(W) / math.log(10))) ## ... end f_gephi.close() if verbose: print "..Done!\n" ## ## ##... output the BC networks? confirm = raw_input( "..There are %d articles in the BC network.\n....do you want to create a gephi file with the BC networks at the articles level? (y/n): " % (len(G.nodes()))) if confirm == 'y': ## ... ini name = "BCnetwork(bcthr=%d, thr=%d).gdf" % (bcthr, thr) dst = os.path.join(out_dir, name) f_gephi = open(dst, 'w') ## ... prep nodes if verbose: print "....nodes" f_gephi.write( "nodedef>name VARCHAR,label VARCHAR,BCcom VARCHAR,firstAU VARCHAR,journal VARCHAR,year VARCHAR,nb_refs DOUBLE\n" ) pl = Utils.Article() pl.read_file(src1) for l in pl.articles: if l.id in partition: BCcom = partition[l.id] if comm_size[BCcom] > thr: foo = l.firstAU + ', ' + l.journal + ', ' + str(l.year) f_gephi.write("%d,'%s',%s,%s,%s,%d,%d\n" % (l.id, foo, str(BCcom), l.firstAU, l.journal, l.year, nR[l.id])) ## ... prep edges if verbose: print "....edges" f_gephi.write( "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,nb_comm_refs DOUBLE" ) for i in BC_table: for j in BC_table[i]: if (i < j) and (i in partition) and (j in partition): comi_size = comm_size[partition[i]] comj_size = comm_size[partition[j]] if (comi_size > thr) and (comj_size > thr): w_ij = (1.0 * BC_table[i][j]) / math.sqrt( nR[i] * nR[j]) f_gephi.write("\n%d,%d,%f,%d" % (i, j, w_ij, BC_table[i][j])) ## ... end f_gephi.close() if verbose: print "..Done!\n" ## ################################### ## END return
# Copyright (c) 2010 Howard Hughes Medical Institute. # All rights reserved. # Use is subject to Janelia Farm Research Campus Software Copyright 1.1 license terms. # http://license.janelia.org/license/jfrc_copyright_1_1.html """ A script to demonstrate community detection. Uses the community module bundled with Neuroptikon and written by Thomas Aynaud <http://perso.crans.org/aynaud/communities/>. """ import community updateProgress(gettext('Finding communities...'), forceDisplay=True) dendogram = community.generate_dendogram(network.simplifiedGraph()) updateProgress(gettext('Finding communities...')) partition = community.best_partition(dendogram) if any(partition): updateProgress(gettext('Isolating communities...')) for visibles in list(display.visibles.itervalues()): for visible in visibles: if visible.isPath() and not isinstance(visible.client, Stimulus): startCommunity, endCommunity = [ partition[node.client.networkId] for node in visible.pathEndPoints() ] if startCommunity != endCommunity: display.removeVisible(visible) updateProgress(gettext('Visually separating communities...')) display.setViewDimensions(2) for obj in network.objects:
import networkx as nx import community from stratagies import * import Queue G = nx.read_edgelist("nets/"+"GTCom-lj (4833).ungraph.txt", delimiter=",") split_biggest_comm(G, 689, 1) x = community.generate_dendogram(G) print x
import json from util.read_utils import lines_per_n import community import networkx as nx author_graph = nx.DiGraph() with open('clean_data.json', 'r') as jfile: for chunk in lines_per_n(jfile, 9): hdr_data = json.loads(chunk) for to_addr in str(hdr_data['To']).split(","): if '@' in to_addr: author_graph.add_edge(str(hdr_data['From']), to_addr.strip(), style='solid', label=hdr_data['Time']) for cc_addr in str(hdr_data['Cc']).split(","): if '@' in to_addr: author_graph.add_edge(str(hdr_data['From']), cc_addr.strip(), style='dashed', label=hdr_data['Time']) jfile.close() print("No. of Weakly Connected Components:", nx.number_weakly_connected_components(author_graph)) print("No. of Strongly Connected Components:", nx.number_strongly_connected_components(author_graph)) print("Nodes:", nx.number_of_nodes(author_graph)) print("Edges:", nx.number_of_edges(author_graph)) #The following lines of code generate a dendogram for the above graph dendo = community.generate_dendogram(author_graph.to_undirected()) for level in range(len(dendo)) : print("Partition at level", level, "is", community.partition_at_level(dendo, level)) print("-"*10)
def CC_network(in_dir, out_dir, verbose): ## INPUT DATA if verbose: print "..Initialize" src1 = os.path.join(in_dir, "articles.dat") src5 = os.path.join(in_dir, "references.dat") Ymin = 2100 Ymax = 1900 pl = Utils.Article() pl.read_file(src1) nb_art = len(pl.articles) art_table = dict() for i in range(nb_art): art_table[i] = [] doc_table = dict() id = 0 for l in pl.articles: doc_table[id] = dict() doc_table[id]['firstAU'] = l.firstAU doc_table[id]['year'] = l.year doc_table[id]['journal'] = l.journal doc_table[id]['citation'] = l.times_cited doc_table[id]['title'] = l.title doc_table[id]['de_keywords'] = l.de_keywords doc_table[id]['id_keywords'] = l.id_keywords doc_table[id]['abstract'] = l.abstract id = id + 1 for l in pl.articles: if (l.year > 1900 and l.year < 2000): if (l.year > Ymax): Ymax = l.year if (l.year < Ymin): Ymin = l.year if verbose: print "..Create Co-citation Network weight table" if verbose: print "....loading refs table" pl = Utils.Ref() pl.read_file(src5) nb_total_refs = len(pl.refs) CC_table = dict() nA = dict() ref_index = dict() for l in pl.refs: foo = l.firstAU + ', ' + str( l.year) + ', ' + l.journal + ', ' + l.volume + ', ' + l.page if l.refid not in ref_index: ref_index[l.refid] = dict() ref_index[l.refid]['firstAU'] = l.firstAU ref_index[l.refid]['year'] = l.year ref_index[l.refid]['journal'] = l.journal ref_index[l.refid]['volume'] = l.volume ref_index[l.refid]['page'] = l.page ref_index[l.refid]['doi'] = l.doi ref_index[l.refid]['article'] = [] ref_index[l.refid]['SubCommID'] = -1 ref_index[l.refid]['modularity'] = -1 ref_index[l.refid]['article'].append(l.id) art_table[l.id].append(l.refid) if l.refid not in nA: nA[l.refid] = 1 else: nA[l.refid] += 1 nb_refs = len(nA) if verbose: print "....detect common articles" for foo in art_table: if (len(art_table[foo]) > 1): for i in art_table[foo]: for j in art_table[foo]: if (i < j): if i not in CC_table: CC_table[i] = dict() if j not in CC_table[i]: CC_table[i][j] = 0 CC_table[i][j] += 1 # choose threshold confirm = 'n' thr = 5 while confirm != 'y': if thr == 1: print "Keep BC links between articles sharing at least %d reference" % ( thr) else: print "Keep BC links between articles sharing at least %d references" % ( thr) confirm = raw_input("Confirm (y/n): ") while confirm not in ['n', 'y']: confirm = raw_input("...typing error!\n Confirm (y/n): ") if confirm == 'n': thr = input( "threshold for BC links -- articles should be share at least ? references:" ) ccthr = thr confirm = 'n' ref_journal_list = [ 'J OPER MANA IN PRESS', 'J OPER MANAG', 'J OPER MANAG FORTHCO', 'J OPERATIONS MANAGE', 'J OPERATIONS MANAGEM', 'J. Oper. Manag.', 'Journal of Operations Management', 'M&SOM-MANUF SERV OP', 'MANUF SERV IN PRESS', 'MANUF SERV OPER MANA', 'MANUF SERV OPERAT MA', 'MANUF SERVICE OPERAT', 'Manufacturing & Service Operations Management', 'MANUFACTURING SERVIC', 'PROD OPER M IN PRESS', 'PROD OPER MANAG', 'PROD OPERAT MANAGEM', 'Production and Operations Management', 'PRODUCTION OPER MANA', 'Production Oper. Management', 'PRODUCTION OPERATION', 'PRODUCTIONS OPERATIO' ] ref_journal_flag = False print "Do you want the journal of references belong to the list below?" for foo in ref_journal_list: print foo confirm = raw_input("Confirm (y/n): ") while confirm not in ['n', 'y']: confirm = raw_input("...typing error!\n Confirm (y/n): ") if confirm == 'y': ref_journal_flag = True ############################## ## BC COMMUNITIES if verbose: print "..CC communities" #... define BC network if verbose: print "....define graph in networkx format" G = nx.Graph() for i in CC_table: for j in CC_table[i]: if ((not ref_journal_flag) or (ref_journal_flag and ref_index[i]['journal'] in ref_journal_list and ref_index[j]['journal'] in ref_journal_list)) and (CC_table[i][j] >= thr): w_ij = (1.0 * CC_table[i][j]) / math.sqrt(nA[i] * nA[j]) G.add_edge(i, j, weight=w_ij) nx.draw_spring(G) dst = os.path.join( out_dir, 'CC-Network(ccthr=%d, thr=%d, ref_journal_flag=%s).png' % (ccthr, thr, ref_journal_flag)) plt.savefig(dst) plt.close('all') #... if verbose: print "....computing communities with Louvain algo" dendogram = community.generate_dendogram(G, part_init=None) #... output infos print "....There are %d references in the database (contain duplicates)" % ( nb_total_refs) print "....There are %d references in the database (contain no duplicate)" % ( nb_refs) print "....There are %d references in the CC network\n......(ie sharing at least %d article(s) with another reference)" % ( len(G.nodes()), ccthr) for level in range(len(dendogram)): part = community.partition_at_level(dendogram, level) mod = community.modularity(part, G) nb_comm = len(set(part.values())) size_sup10 = 0 size_sup100 = 0 #communities_caracteristics(partition, thr, level) for com in set(part.values()): list_nodes = [nodes for nodes in part.keys() if part[nodes] == com] if len(list_nodes) > 100: size_sup100 += 1 if len(list_nodes) > 10: size_sup10 += 1 print "....level %d: %d communities [%d with size > 10, %d with size > 100], modularity Q=%1.6f" % ( level, nb_comm, size_sup10, size_sup100, mod) ############################## ## WHICH EXTRACTION ? print "..CC communities extraction" # confirm = 'n' level = len(dendogram) - 1 thr = 0 while confirm != 'y': part = community.partition_at_level(dendogram, level) nb_comm = len(set(part.values())) size_sup_thr = 0 n_sup_thr = 0 for com in set(part.values()): list_nodes = [nodes for nodes in part.keys() if part[nodes] == com] if len(list_nodes) > thr: n_sup_thr += len(list_nodes) size_sup_thr += 1 print "....Extraction of level %d CC communities with size > %d\n......(%d articles gathered in %d communities):" % ( level, thr, n_sup_thr, size_sup_thr) confirm = raw_input("....do you confirm? (y/n): ") if confirm == 'n': level = input("......level you want to extract:") thr = input("......keep communities of size > to:") #... partition partition = community.partition_at_level(dendogram, level) list_nodes = dict() for com in set(partition.values()): list_nodes[com] = [ nodes for nodes in partition.keys() if partition[nodes] == com ] ############################# # sub-community partition subcomm = dict() for com in list_nodes: # plot SubGraph for each community if verbose: print "....plot SubGraph for community %d" % (com) subG = nx.subgraph(G, list_nodes[com]) nx.draw_spring(subG) dst = os.path.join(out_dir, 'SubGraph/Plot/SubGraph-%d.png' % (com)) plt.savefig(dst) plt.close('all') # partition if verbose: print "....sub clustering for community %d" % (com) part = community.best_partition(subG) # basic descriptive statistics comm_size = len(subG.nodes()) nb_comm = len(set(part.values())) subcomm[com] = dict() subcomm[com]['nb_comm'] = nb_comm subcomm[com]['size'] = comm_size mod = community.modularity(part, subG) # record each node's sub community id for refid in part.keys(): ref_index[refid]['SubCommID'] = part[refid] ref_index[refid]['modularity'] = mod if verbose: print "......comm_size:%d, nb_comm:%d, modularity:%1.6f" % ( comm_size, nb_comm, mod) # output gephi files if verbose: print "......generate gephi files for sub-community %d" % (com) name = "SubGraph/Gephi/SubCCnetwork%d(ccthr=%d, thr=%d, ref_journal_flag=%s).gdf" % ( com, ccthr, thr, ref_journal_flag) dst = os.path.join(out_dir, name) f_gephi = open(dst, 'w') # nodes f_gephi.write( "nodedef>name VARCHAR,label VARCHAR,CCcom VARCHAR, Sub CCcom VARCHAR, Modularity VARCHAR, firstAU VARCHAR,journal VARCHAR,year VARCHAR,nb_arts DOUBLE,doi VARCHAR, volume VARCHAR, page VARCHAR\n" ) for refid in part.keys(): foo = ref_index[refid]['firstAU'] + ', ' + ref_index[refid][ 'journal'] + ', ' + str(ref_index[refid]['year']) f_gephi.write( "%d,'%s',%s,%s,%1.6f,%s,%s,%d,%d,%s,%s,%s\n" % (refid, foo, str(com), str(ref_index[refid]['SubCommID']), ref_index[refid]['modularity'], ref_index[refid]['firstAU'], ref_index[refid]['journal'], ref_index[refid]['year'], nA[refid], ref_index[refid]['doi'], ref_index[refid]['volume'], ref_index[refid]['page'])) # edges f_gephi.write( "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,nb_comm_refs DOUBLE" ) for i in part.keys(): for j in part.keys(): if (i < j): if i in CC_table: if j in CC_table[i]: w_ij = (1.0 * CC_table[i][j]) / math.sqrt( nA[i] * nA[j]) f_gephi.write("\n%d,%d,%f,%d" % (i, j, w_ij, CC_table[i][j])) # end f_gephi.close() #.. comm_size comm_size = dict() for com in list_nodes: size = len(list_nodes[com]) comm_size[com] = size # sort community by its size comm_size = dict() for com in list_nodes: size = len(list_nodes[com]) comm_size[com] = size Lcomm_size = comm_size.items() Lcomm_size.sort(cmpval) ############################## # Research Base CSV files if verbose: print "..Research Base CSV files generating" filename = os.path.join(out_dir, "ResearchBase.dat") f_out = open(filename, "w") # header line f_out.write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ('CommunityID', 'SubCommunityID', 'Modularity', 'Topic', 'SubTopic', 'RefID', 'Volume', 'Page', 'Lable', 'Title', 'Keywords', 'firstAU', 'Journal', 'Year', 'Citation', 'DOI')) for elm in Lcomm_size: com = elm[0] for ref in list_nodes[com]: foo = ref_index[ref]['firstAU'] + ', ' + ref_index[ref][ 'journal'] + ', ' + str(ref_index[ref]['year']) f_out.write( "%s\t%s\t%1.6f\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (str(com), str(ref_index[ref]['SubCommID']), ref_index[ref]['modularity'], '', '', str(ref), str(ref_index[ref]['volume']), str(ref_index[ref]['page']), foo, '', '', ref_index[ref]['firstAU'], ref_index[ref]['journal'], str(ref_index[ref]['year']), str(nA[ref]), ref_index[ref]['doi'])) f_out.close() if verbose: print "..Done!\n" ############################## # Research Front CSV files if verbose: print "..Research Front CSV files generating" filename = os.path.join(out_dir, "ResearchFront.dat") f_out = open(filename, "w") # header line f_out.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ('CommunityID', 'SubCommunityID', 'RefID', 'DocID', 'DocLable', 'Title', 'Year', 'Citation', 'DE-Keywords', 'ID-Keywords', 'Abstract')) for elm in Lcomm_size: com = elm[0] for ref in list_nodes[com]: for doc in ref_index[ref]['article']: foo = doc_table[doc]['firstAU'] + ', ' + doc_table[doc][ 'journal'] + ', ' + str(doc_table[doc]['year']) f_out.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (str(com), str(ref_index[ref]['SubCommID']), str(ref), str(doc), foo, doc_table[doc]['title'], str(doc_table[doc]['year']), str(doc_table[doc]['citation']), doc_table[doc]['de_keywords'], doc_table[doc]['id_keywords'], doc_table[doc]['abstract'])) f_out.close() if verbose: print "..Done!\n" ############################# # OUTPUT GEPHI FILES #... output gephi # if verbose: print "..Preparing gephi gdf file for CC communities network" # ... ini # name = "CC_comm_level%d(ccthr=%d, thr=%d, ref_journal_flag=%s).gdf" % (level,ccthr,thr,ref_journal_flag) # dst = os.path.join(out_dir, name) # f_gephi = open(dst,'w') # ... prep nodes # if verbose: print "....nodes" # f_gephi.write("nodedef>name VARCHAR,label VARCHAR,size DOUBLE,inv_innerweight DOUBLE\n") # for com in comm_size: # if (comm_size[com] > thr) and (com in comm_label): f_gephi.write("%d,'%s',%d,%1.0f\n" % (com, comm_label[com], comm_size[com], comm_innerw[com]) ) # ... prep edges # if verbose: print "....edges" # f_gephi.write("edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,logweight DOUBLE\n") # for com1 in list_nodes: # for com2 in list_nodes: # size1 = len(list_nodes[com1]); size2 = len(list_nodes[com2]); # if size1 > thr and size2 > thr and com1 > com2: # W = 0; # for id1 in list_nodes[com1]: # for id2 in list_nodes[com2]: # if id2 in G.edge[id1]: # W += G.edge[id1][id2]['weight'] # W *= 1000.0 / (size1 * size2) # if W > 0.000001: # f_gephi.write("%d,%d,%1.9f,%1.2f\n" % (com1, com2, W, 6 + math.log(W)/math.log(10)) ) # ... end # f_gephi.close() # if verbose: print"..Done!\n" ## ## ##... output the CC networks? confirm = raw_input( "..There are %d articles in the CC network.\n....do you want to create a gephi file with the CC networks at the articles level? (y/n): " % (len(G.nodes()))) if confirm == 'y': ## ... ini name = "CCnetwork(ccthr=%d, thr=%d, ref_journal_flag=%s).gdf" % ( ccthr, thr, ref_journal_flag) dst = os.path.join(out_dir, name) f_gephi = open(dst, 'w') ## ... prep nodes if verbose: print "....nodes" f_gephi.write( "nodedef>name VARCHAR,label VARCHAR,CCcom VARCHAR, Sub CCcom VARCHAR, Modularity VARCHAR, firstAU VARCHAR,journal VARCHAR,year VARCHAR,nb_arts DOUBLE,doi VARCHAR, volume VARCHAR, page VARCHAR\n" ) for refid in ref_index: if refid in partition: CCcom = partition[refid] if comm_size[CCcom] > thr: foo = ref_index[refid]['firstAU'] + ', ' + ref_index[ refid]['journal'] + ', ' + str( ref_index[refid]['year']) f_gephi.write( "%d,'%s',%s,%s,%1.6f,%s,%s,%d,%d,%s,%s,%s\n" % (refid, foo, str(CCcom), str(ref_index[refid]['SubCommID']), ref_index[refid]['modularity'], ref_index[refid]['firstAU'], ref_index[refid]['journal'], ref_index[refid]['year'], nA[refid], ref_index[refid]['doi'], ref_index[refid]['volume'], ref_index[refid]['page'])) ## ... prep edges if verbose: print "....edges" f_gephi.write( "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,nb_comm_refs DOUBLE" ) for i in CC_table: for j in CC_table[i]: if (i < j) and (i in partition) and (j in partition): commi_size = comm_size[partition[i]] commj_size = comm_size[partition[j]] if (commi_size > thr) and (commj_size > thr): w_ij = (1.0 * CC_table[i][j]) / math.sqrt( nA[i] * nA[j]) f_gephi.write("\n%d,%d,%f,%d" % (i, j, w_ij, CC_table[i][j])) ## ... end f_gephi.close() if verbose: print "..Done!\n" ############################## # Main Community Characteristics file type = "main" confirm = raw_input( "..Do you want to extract the characteristics for main communitise? \n Confirm (y/n):" ) if confirm == 'y': label = report.community_characteristics(in_dir, out_dir, type, ccthr, thr, ref_journal_flag, G, level, partition, list_nodes, art_table, verbose) ############################## # Sub Community Characteristics files if verbose: print "..Sub Computing communities caracteristics" confirm = raw_input( "..Do you want to extract the characteristics for sub communitise? \n Confirm (y/n):" ) if confirm == 'y': sub_label = dict() for com in list_nodes: type = str(com) subG = nx.subgraph(G, list_nodes[com]) level = len(community.generate_dendogram(subG)) - 1 sub_partition = community.best_partition(subG) sub_list_nodes = dict() for ref in sub_partition: sub_comm = sub_partition[ref] if sub_comm in sub_list_nodes: sub_list_nodes[sub_comm].append(ref) else: sub_list_nodes[sub_comm] = [] sub_label[com] = report.community_characteristics( in_dir, out_dir, type, ccthr, thr, ref_journal_flag, subG, level, sub_partition, sub_list_nodes, art_table, verbose, label) ############################## # Community Characteristics PDF generation confirm = raw_input( "..Do you want to generate the pdf files of characteristics for communitise? \n Confirm (y/n):" ) if confirm == 'y': report.latex(os.path.join(out_dir, "Report"), verbose) ## ################################### ## END return
def GenerateDendorgram(graph): global dendorgram dendorgram = community.generate_dendogram(graph)
def print_dendrogram(self): dendo = community.generate_dendogram(self.G) for level in range(len(dendo) - 1): print "partition at level", level, "is", community.partition_at_level( dendo, level)
print col_name_list #creating a dictionary with key = 'id' and value = ids in ref_id mygraph = {} for row in list(res): key = row['id'] if row['ref_id'] is not u'' or None: val = map(int, row['ref_id'].strip().split(";")) mygraph[key] = val else: mygraph[key] = row['ref_id'] #generating the dendogram G = nx.from_dict_of_lists(mygraph) nx.write_adjlist(G, "test.adjlist") dendo = community.generate_dendogram(G) comDict = community.partition_at_level(dendo, 3) resultDict = {} #Calculating - Dissimilarity Matrix #for all pair of vertices find the dissimilarity matrix for (key1, val1), (key2, val2) in combinations(mygraph.items(), 2): #vertices common to both the vertices key1 and key2 will be #in intersection list of its values neighbours = [val for val in val1 if val in val2] dissimilarIndx = 0 for pair in combinations(neighbours, 2): for a, b in pair: if (comDict[a] != comDict[b]): dissimilarIndx = dissimilarIndx + 1
def __GenerateDendorgram(self): global dendorgram dendorgram = community.generate_dendogram(self.m_graph)
print("Computing modularities...") print(">> max-core communities: {}".format(len(max_core_communities))) modularity_max_core = cm.modularity(G, max_core_communities) print(">> max-core modularity: {}".format(modularity_max_core)) print(">> 4-core communities: {}".format(len(four_cores_communities))) modularity_four_core = cm.modularity(G, four_cores_communities) print(">> 4-core modularity: {}".format(modularity_four_core)) print("Computing wcc...") wcc_max_core = cm.wcc(max_core_communities[0], G) print(">> max-core wcc: {}".format(wcc_max_core)) wcc_four_core = cm.wcc(four_cores_communities[0], G) print(">> 4-core wcc: {} size: {}".format(wcc_four_core, four_cores_communities[0].number_of_nodes())) print("Computing louvain...") dendo = generate_dendogram(G) def uniq(lst): return len(set(lst)) louvain_steps = [uniq(prt.values()) for prt in dendo] print(">> Louvain Steps:", louvain_steps) print("Saving to {}".format(OUT_FILE)) RESULT = { "max_core": max_core, "num_4-cores": num_4_core, "modularity_max-cores": modularity_max_core, "modularity_4-cores": modularity_four_core, "wcc_max-cores": wcc_max_core, "wcc_4-cores": wcc_four_core, "louvain_steps": louvain_steps }
for com in set(parts.values()) : count = count + 1. list_nodes = [nodes for nodes in parts.keys() if parts[nodes] == com] nx.draw_networkx_nodes(G_fb, spring_pos, list_nodes, node_size = 15, node_color = str(count / size)) nx.draw_networkx_edges(G_fb,spring_pos, alpha=0.5) plt.show() ##### Dendo graph ###### dendo = community.generate_dendogram(G_fb) for level in range(len(dendo) - 1) : print "partition at level", level, "is", community.partition_at_level(dendo, level) ##### induced graph #### G=community.induced_graph(parts, G_fb) #nx.draw_networkx(G, pos = spring_pos, cmap = plt.get_cmap("jet"), node_color = values, node_size = 15, with_labels = False) nx.draw_networkx(G)
G=nx.Graph() for i in CC_table: for j in CC_table[i]: if ((not ref_journal_flag) or (ref_journal_flag and ref_index[i]['journal'] in ref_journal_list and ref_index[j]['journal'] in ref_journal_list)) and (CC_table[i][j]>=ccthr): w_ij = (1.0 * CC_table[i][j]) / math.sqrt(nA[i] * nA[j]) G.add_edge(i, j, weight=w_ij) #... calculate basic centrality for each node if verbose: print "..calculate basic centrality for each node" degree = nx.degree_centrality(G) closeness = nx.closeness_centrality(G) betweenness = nx.betweenness_centrality(G) #... if verbose: print "....computing communities with Louvain algo" dendogram = community.generate_dendogram(G, part_init=None) #... output infos print "....There are %d references in the database (contain duplicates)" % (nb_total_refs) print "....There are %d references in the database (contain no duplicate)" % (nb_refs) print "....There are %d references in the given journals (contain no duplicate)" % (nb_given_journals_refs) print "....There are %d references in the CC network\n......(ie sharing at least %d article(s) with another reference)" % (len(G.nodes()), ccthr) for level in range(len(dendogram)): part = community.partition_at_level(dendogram, level) mod = community.modularity(part, G) nb_comm = len(set(part.values())) size_sup10 = 0; size_sup100 = 0; #communities_caracteristics(partition, thr, level) for com in set(part.values()) : list_nodes = [nodes for nodes in part.keys() if part[nodes] == com] if len(list_nodes) > 100: size_sup100 += 1 if len(list_nodes) > 10: size_sup10 += 1