Exemple #1
0
 def prepare_communities(self):
     if hasattr(community, 'generate_dendrogram'):
         self.dendrogram = community.generate_dendrogram(self.g)
     else:
         self.dendrogram = community.generate_dendogram(self.g)
     for level in range(len(self.dendrogram)):
         pass 
 def test_modularity_increase(self):
     """
     Generate a dendogram and test that modularity is always increasing
     """
     g = nx.erdos_renyi_graph(1000, 0.01)
     dendo = co.generate_dendogram(g)
     mod_prec = -1.
     mods = [co.modularity(co.partition_at_level(dendo, level), g) for level in range(len(dendo)) ]
     self.assertListEqual(mods, sorted(mods))
Exemple #3
0
    def __init__(self, directed_graph):
        self.directed_graph = directed_graph

        dendogram = community.generate_dendogram(self.directed_graph.to_undirected())
        partitions = community.partition_at_level(dendogram, len(dendogram)-1)
        communities = self._get_communities(partitions)
        major_communities = self._get_large_communities(communities)

        self.community_graphs = self._build_community_graphs(communities,
                                      valid_communities=major_communities)

        self.community_rankings = self._pagerank_communities(self.community_graphs)
def Mod(G,usebest=True,l=1):
	D = G.to_undirected()
	dendo = community.generate_dendogram(D, None)
	if usebest:
		level = len(dendo)-1
	else:
		level = l
	partition = community.partition_at_level(dendo,level)
	mod = community.modularity(partition, D)
	for n in G:
		G.node[n]['m'] = partition[n]
	return mod
 def test_nodes_stay_together(self):
     """
     Test that two nodes in the same community at one level stay in the same at higher level
     """
     g = nx.erdos_renyi_graph(500, 0.01)
     dendo = co.generate_dendogram(g)
     parts = dict([])
     for l in range(len(dendo)) :
         parts[l] = co.partition_at_level(dendo, l)
     for l in range(len(dendo)-1) :
         p1 = parts[l]
         p2 = parts[l+1]
         coms = set(p1.values())
         for com in coms :
             comhigher = [ p2[node] for node, comnode in p1.iteritems() if comnode == com]
             self.assertEqual(len(set(comhigher)), 1)
def louvain(adjacency_matrix):
    """
    Performs community embedding using the LOUVAIN method.

    Introduced in: Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008).
                   Fast unfolding of communities in large networks.
                   Journal of Statistical Mechanics: Theory and Experiment, 2008(10), P10008.

    Inputs:  - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix.

    Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix.
    """
    # Convert to networkx undirected graph.
    adjacency_matrix = nx.from_scipy_sparse_matrix(adjacency_matrix,
                                                   create_using=nx.Graph())

    # Call LOUVAIN algorithm to calculate a hierarchy of communities.
    tree = community.generate_dendogram(adjacency_matrix, part_init=None)

    # Embed communities
    row = list()
    col = list()
    append_row = row.append
    append_col = col.append

    community_counter = 0
    for i in range(len(tree)):
        partition = community.partition_at_level(tree, i)
        for n, c in partition.items():
            append_row(n)
            append_col(community_counter + c)

        community_counter += max(partition.values()) + 1

    row = np.array(row)
    col = np.array(col)
    data = np.ones(row.size, dtype=np.float64)

    louvain_features = sparse.coo_matrix(
        (data, (row, col)),
        shape=(len(partition.keys()), community_counter),
        dtype=np.float64)

    return louvain_features
def louvain(adjacency_matrix):
    """
    Performs community embedding using the LOUVAIN method.

    Introduced in: Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008).
                   Fast unfolding of communities in large networks.
                   Journal of Statistical Mechanics: Theory and Experiment, 2008(10), P10008.

    Inputs:  - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix.

    Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix.
    """
    # Convert to networkx undirected graph.
    adjacency_matrix = nx.from_scipy_sparse_matrix(adjacency_matrix, create_using=nx.Graph())

    # Call LOUVAIN algorithm to calculate a hierarchy of communities.
    tree = community.generate_dendogram(adjacency_matrix, part_init=None)

    # Embed communities
    row = list()
    col = list()
    append_row = row.append
    append_col = col.append

    community_counter = 0
    for i in range(len(tree)):
        partition = community.partition_at_level(tree, i)
        for n, c in partition.items():
            append_row(n)
            append_col(community_counter + c)

        community_counter += max(partition.values()) + 1

    row = np.array(row)
    col = np.array(col)
    data = np.ones(row.size, dtype=np.float64)

    louvain_features = sparse.coo_matrix((data, (row, col)), shape=(len(partition.keys()), community_counter),
                                         dtype=np.float64)

    return louvain_features
Exemple #8
0
    def predict(self):
        """Predict using community structure

        If two nodes belong to the same community, they are predicted to form
        a link. This uses the Louvain algorithm, which determines communities
        at different granularity levels: the finer grained the community, the
        higher the resulting score.

        You'll need to install Thomas Aynaud's python-louvain package from
        https://bitbucket.org/taynaud/python-louvain for this.

        """
        try:
            from community import generate_dendogram, partition_at_level
        except ImportError:
            raise ImportError("Module 'community' could not be found. "
                              "Please install python-louvain from "
                              "https://bitbucket.org/taynaud/python-louvain")
        from collections import defaultdict

        res = Scoresheet()
        dendogram = generate_dendogram(self.G)

        for i in range(len(dendogram)):
            partition = partition_at_level(dendogram, i)
            communities = defaultdict(list)
            weight = len(dendogram) - i  # Lower i, smaller communities

            for n, com in six.iteritems(partition):
                communities[com].append(n)
            for nodes in six.itervalues(communities):
                for u, v in all_pairs(nodes):
                    if not self.eligible(u, v):
                        continue
                    res[(u, v)] += weight
        return res
Exemple #9
0
    def predict(self):
        """Predict using community structure

        If two nodes belong to the same community, they are predicted to form
        a link. This uses the Louvain algorithm, which determines communities
        at different granularity levels: the finer grained the community, the
        higher the resulting score.

        You'll need to install Thomas Aynaud's python-louvain package from
        https://bitbucket.org/taynaud/python-louvain for this.

        """
        try:
            from community import generate_dendogram, partition_at_level
        except ImportError:
            raise ImportError("Module 'community' could not be found. "
                              "Please install python-louvain from "
                              "https://bitbucket.org/taynaud/python-louvain")
        from collections import defaultdict

        res = Scoresheet()
        dendogram = generate_dendogram(self.G)

        for i in range(len(dendogram)):
            partition = partition_at_level(dendogram, i)
            communities = defaultdict(list)
            weight = len(dendogram) - i  # Lower i, smaller communities

            for n, com in six.iteritems(partition):
                communities[com].append(n)
            for nodes in six.itervalues(communities):
                for u, v in all_pairs(nodes):
                    if not self.eligible(u, v):
                        continue
                    res[(u, v)] += weight
        return res
Exemple #10
0
	#... define BC network
	if verbose: print "....define graph in networkx format"
	G=nx.Graph()
	for i in CC_table:
		for j in CC_table[i]:
			if ((not ref_journal_flag) or (ref_journal_flag and ref_index[i]['journal'] in ref_journal_list and ref_index[j]['journal'] in ref_journal_list)) and (CC_table[i][j]>=thr):
				w_ij = (1.0 * CC_table[i][j]) / math.sqrt(nA[i] * nA[j])
				G.add_edge(i, j, weight=w_ij)
	nx.draw_spring(G)
	dst = os.path.join(out_dir, 'CC-Network(ccthr=%d, thr=%d, ref_journal_flag=%s).png' % (ccthr, thr, ref_journal_flag))
	plt.savefig(dst)
	plt.close('all')
	
	#...
	if verbose: print "....computing communities with Louvain algo"
	dendogram = community.generate_dendogram(G, part_init=None)

	#... output infos
	print "....There are %d references in the database (contain duplicates)" % (nb_total_refs)
	print "....There are %d references in the database (contain no duplicate)" % (nb_refs)
	print "....There are %d references in the CC network\n......(ie sharing at least %d article(s) with another reference)" % (len(G.nodes()), ccthr)
	for level in range(len(dendogram)):
		part = community.partition_at_level(dendogram, level)
		mod = community.modularity(part, G)
		nb_comm = len(set(part.values()))
		size_sup10 = 0; size_sup100 = 0;  #communities_caracteristics(partition, thr, level)
		for com in set(part.values()) :
			list_nodes = [nodes for nodes in part.keys() if part[nodes] == com]
			if len(list_nodes) > 100: size_sup100 += 1
			if len(list_nodes) > 10: size_sup10 += 1
		print "....level %d: %d communities [%d with size > 10, %d with size > 100], modularity Q=%1.6f" % (level, nb_comm, size_sup10, size_sup100, mod)
#  Copyright (c) 2010 Howard Hughes Medical Institute.
#  All rights reserved.
#  Use is subject to Janelia Farm Research Campus Software Copyright 1.1 license terms.
#  http://license.janelia.org/license/jfrc_copyright_1_1.html

"""
A script to demonstrate community detection.
Uses the community module bundled with Neuroptikon and written by Thomas Aynaud <http://perso.crans.org/aynaud/communities/>.
"""

import community

updateProgress(gettext('Finding communities...'), forceDisplay = True)
dendogram = community.generate_dendogram(network.simplifiedGraph())
updateProgress(gettext('Finding communities...'))
partition = community.best_partition(dendogram)

if any(partition):
    updateProgress(gettext('Isolating communities...'))
    for visibles in list(display.visibles.itervalues()):
        for visible in visibles:
            if visible.isPath() and not isinstance(visible.client, Stimulus):
                startCommunity, endCommunity = [partition[node.client.networkId] for node in visible.pathEndPoints()]
                if startCommunity != endCommunity:
                    display.removeVisible(visible)
    
    updateProgress(gettext('Visually separating communities...'))
    display.setViewDimensions(2)
    for obj in network.objects:
        display.setVisiblePosition(obj, fixed = False)
    try:
Exemple #12
0
            link_list,
            jfile,
        )
        evt += 1  # can't be anoated
        if evt > 0:
            break

print 'fnum ' + str(fnum)
uids = cntuids.keys()  # id of all users
pnum = len(uids)
print 'pnum ' + str(pnum)

matrix = [{} for i in xrange(pnum)]
layout_matrix = [{} for i in xrange(pnum)]
G = build_G(pnum, uids, matrix, layout_matrix)
dendo = cm.generate_dendogram(G)
partition = cm.partition_at_level(dendo, len(dendo) - 1)

layout_G = build_layout_G(layout_matrix)
print 'nx.spring_layout'
begin = time.time()
pos = nx.spring_layout(layout_G, iterations=30)  # default 50
print 'pos ' + (str(time.time() - begin))

uid_loc = {}
uid_cls = {}
cat_cnt = [0 for i in xrange(cnum)]
group = {}
for i in xrange(len(uids)):
    cat_cnt[partition[i]] += 1
    uid_loc[uids[i]] = [float(pos[i][0]), float(pos[i][1])]
    #best partition calculation
    cluster2 = community.best_partition(graph)

    #print run time for c1
    runtimeC2 = timeit.default_timer() - startC2

    mod2 = community.modularity(cluster2, graph)



    graph = nx.read_edgelist("Data/a.data")

    #timer start c1
    startC3 = timeit.default_timer()

    tmp = community.generate_dendogram(graph)
    cluster3 = community.partition_at_level(tmp, 0)

    #print run time for c1
    runtimeC3 = timeit.default_timer() - startC3

    mod3 = community.modularity(cluster3, graph)

    print "modularity:  1:%f;  2:%f;  3:%f" % (mod1,mod2, mod3)
    nmi1 = calculate_NMI(cluster1, cluster2)
    print "nmi between cluster1 and cluster 2: %.10f" % nmi1
    nmi2 = calculate_NMI(cluster1, cluster3)
    print "nmi between cluster1 and cluster 3: %.10f" % nmi2
    nmi3 = calculate_NMI(cluster2, cluster3)
    print "nmi between cluster2 and cluster 3: %.10f" % nmi3
Exemple #14
0
        clust= nx.average_clustering(net)
        add("clustering", centralization)


        mcc = max(nx.connected_component_subgraphs(net),key=len)
        ecc = nx.eccentricity(mcc)
        min_ecc = min(ecc.values())
        size_center = sum([1 for node in mcc if ecc[node]<=min_ecc+1])
        diameter = max(ecc.values())
        ed =effective_diameter(net)
        add("diameter", diameter)
        add("effective_diameter",ed)
        add("size_center", size_center)

        dendo =com.generate_dendogram(net)
        dic_com_nodes=defaultdict(list)
        for node,community in dendo[0].iteritems():
            dic_com_nodes[community].append(node)
        mod = com.modularity(dendo[0],net)
        add("nb_communities", len(dic_com_nodes))
        add("modularity", mod)

        path_to_file = file.replace(".gexf","")
        write_edgelist(path_to_file)
        write_motifs(path_to_file)
        for motif, score in analyse_motifs(path_to_file):
            add("motif_"+str(motif), score)

if not os.path.exists("results_a/"):
    os.makedirs("results_a/")
 def print_dendrogram(self):
     dendo = community.generate_dendogram(self.G)
     for level in range(len(dendo) - 1) :
         print "partition at level", level, "is", community.partition_at_level(dendo, level)
Exemple #16
0
def BC_network(in_dir, out_dir, verbose):

    ## INPUT DATA
    if verbose: print "..Initialize"

    src1 = os.path.join(in_dir, "articles.dat")
    src5 = os.path.join(in_dir, "references.dat")

    Ymin = 2100
    Ymax = 1900  # store the min and max publication year
    nR = dict()  # store the number of refs of the articles
    pl = Utils.Article()
    pl.read_file(src1)
    nb_art = len(pl.articles)  # store the number of articles within database
    for l in pl.articles:
        nR[l.id] = 0
        if (l.year > 1900 and l.year < 2100):
            if (l.year > Ymax): Ymax = l.year
            if (l.year < Ymin): Ymin = l.year

    ## CREATE BC WEIGHT TABLE
    if verbose: print "..Create the 'Bibliographic Coupling' weight table"

    ref_table = dict()  # store the id of articles using a given ref
    BC_table = dict(
    )  # store the number of common refs between pairs of articles

    if verbose: print "....loading refs table"
    pl = Utils.Ref()
    pl.read_file(src5)
    for l in pl.refs:
        foo = l.firstAU + ', ' + str(
            l.year) + ', ' + l.journal + ', ' + l.volume + ', ' + l.page
        if foo in ref_table: ref_table[foo].append(l.id)
        else: ref_table[foo] = [l.id]
        nR[l.id] += 1

    if verbose: print "....detecting common references"
    for foo in ref_table:
        if len(ref_table[foo]) > 1:
            for i in ref_table[foo]:
                for j in ref_table[foo]:
                    if i < j:
                        if i not in BC_table: BC_table[i] = dict()
                        if j not in BC_table[i]: BC_table[i][j] = 0
                        BC_table[i][j] += 1
    """
  ## EXPORT BC WEIGHT TABLE
  if verbose: print "Output the BC_weight table"
  filename = os.path.join(in_dir, "BCweight.txt")
  f_BC = open(filename,'w')
  for i in BC_table:
	for j in BC_table[i]:
	  w_ij = (1.0 * BC_table[i][j]) / math.sqrt(nR[i] * nR[j])
	  #f_BC.write("%d\t%d\t%f\t%d\n" % (i, j, w_ij, BC_table[i][j]) )
	  if i > j: f_BC.write("%d %d %1.7f\n" % (i, j, w_ij) )
  f_BC.close()
  """

    # choose threshold
    confirm = 'n'
    thr = 1

    while confirm != 'y':
        if thr == 1:
            print "Keep BC links between articles sharing at least %d reference" % (
                thr)
        else:
            print "Keep BC links between articles sharing at least %d references" % (
                thr)
        confirm = raw_input("Confirm (y/n): ")
        while confirm not in ['n', 'y']:
            confirm = raw_input("...typing error!\n Confirm (y/n): ")
        if confirm == 'n':
            thr = input(
                "threshold for BC links -- articles should be share at least ? references:"
            )

    bcthr = thr

    ##############################
    ## BC COMMUNITIES
    if verbose: print "..BC communities"
    #... define BC network
    if verbose: print "....define graph in networkx format"
    G = nx.Graph()
    for i in BC_table:
        for j in BC_table[i]:
            if BC_table[i][j] >= thr:
                w_ij = (1.0 * BC_table[i][j]) / math.sqrt(nR[i] * nR[j])
                G.add_edge(i, j, weight=w_ij)

    #...
    if verbose: print "....computing communities with Louvain algo"
    dendogram = community.generate_dendogram(G, part_init=None)

    #... output infos
    print "....There are %d articles in the database" % (nb_art)
    print "....There are %d articles in the BC network\n......(ie sharing at least one reference with another article)" % (
        len(G.nodes()))
    for level in range(len(dendogram)):
        part = community.partition_at_level(dendogram, level)
        mod = community.modularity(part, G)
        nb_comm = len(set(part.values()))
        size_sup10 = 0
        size_sup100 = 0
        #communities_caracteristics(partition, thr, level)
        for com in set(part.values()):
            list_nodes = [nodes for nodes in part.keys() if part[nodes] == com]
            if len(list_nodes) > 100: size_sup100 += 1
            if len(list_nodes) > 10: size_sup10 += 1
        print "....level %d: %d communities [%d with size > 10, %d with size > 100], modularity Q=%1.6f" % (
            level, nb_comm, size_sup10, size_sup100, mod)

    ##############################
    ## WHICH EXTRACTION ?
    print "..BC communities extraction"
    #
    confirm = 'n'
    level = len(dendogram) - 1
    thr = 10
    while confirm != 'y':
        part = community.partition_at_level(dendogram, level)
        nb_comm = len(set(part.values()))
        size_sup_thr = 0
        n_sup_thr = 0
        for com in set(part.values()):
            list_nodes = [nodes for nodes in part.keys() if part[nodes] == com]
            if len(list_nodes) > thr:
                n_sup_thr += len(list_nodes)
                size_sup_thr += 1
        print "....Extraction of level %d BC communities with size > %d\n......(%d articles gathered in %d communities):" % (
            level, thr, n_sup_thr, size_sup_thr)
        confirm = raw_input("....do you confirm? (y/n): ")
        if confirm == 'n':
            level = input("......level you want to extract:")
            thr = input("......keep communities of size > to:")

    #... partition
    partition = community.partition_at_level(dendogram, level)
    list_nodes = dict()
    for com in set(partition.values()):
        list_nodes[com] = [
            nodes for nodes in partition.keys() if partition[nodes] == com
        ]

    ##############################
    ## COMMUNITIES CARACTERISTICS
    if verbose: print "..Computing communities caracteristics"
    #.. ini
    filename = os.path.join(
        out_dir, "BCcomm_ID_Cards(bcthr=%d, thr=%d).tex" % (bcthr, thr))
    f_out = open(filename, "w")
    f_out.write(
        "\documentclass[a4paper,11pt]{report}\n\usepackage[english]{babel}\n\usepackage[latin1]{inputenc}\n\usepackage{amsfonts,amssymb,amsmath}\n\usepackage{pdflscape}\n\usepackage{color}\n\n\\addtolength{\evensidemargin}{-60pt}\n\\addtolength{\oddsidemargin}{-60pt}\n\\addtolength{\\textheight}{80pt}\n\n\\title{{\\bf Communities ID Cards}}\n\date{\\begin{flushleft}This document gather the ``ID Cards'' of the BC communities found within your database.\\\\\n The BC network was built by keeping a link between articles sharing at least %d references. The communities characterized here correspond to the ones found in the level %d (in the sense of the Louvain algo) which gathers more than %d articles.\\\\\n These ID cards displays the most frequent keywords, subject categories, journals of publication, institution, countries, authors, references and reference journals of the articles of each community. The significance of an item $\sigma = \sqrt{N} (f - p) / \sqrt{p(1-p)}$ [where $N$ is the number of articles within the community and $f$ and $p$ are the proportion of articles respectively within the community and within the database displaying that item ] is also given (for example $\sigma > 5$ is really highly significant).\\\\\n\\vspace{1cm}\n\copyright Sebastian Grauwin, Liu Weizhi - (2014) \end{flushleft}}\n\n\\begin{document}\n\\begin{landscape}\n\maketitle\n"
        % (bcthr, level, thr))

    #.. quantitative
    comm_innerw = dict()
    comm_size = dict()
    for com in list_nodes:
        size = len(list_nodes[com])
        W = 0
        for id1 in list_nodes[com]:
            for id2 in list_nodes[com]:
                if id2 > id1 and id2 in G.edge[id1]:
                    W += G.edge[id1][id2]['weight']
        W *= 2.0 / (size * (size - 1))
        comm_innerw[com] = 1.0 / W
        comm_size[com] = size
    Lcomm_size = comm_size.items()
    Lcomm_size.sort(cmpval)

    #.. frequency / significance of keywords, etc...
    comm_label = dict()
    (stuffK, stuffS, stuffJ, stuffA, stuffI, stuffC, stuffR,
     stuffRJ) = BCUtils.comm_tables(in_dir, partition, thr, verbose)

    #.. output tables
    for elm in Lcomm_size:
        if elm[1] > thr:
            com = elm[0]
            #K
            if com in stuffK:
                if len(stuffK[com]) > 0: comm_label[com] = stuffK[com][0][0]
                else: comm_label[com] = 'XXXX'
                f_out.write(
                    "\clearpage\n\n\\begin{table}[!ht]\n\caption{The community ``%s'' contains $N = %d$ articles. Its average internal link weight is $<\omega_{in}> \simeq 1/%d$ }\n\\textcolor{white}{aa}\\\\\n{\scriptsize\\begin{tabular}{|l r r|}\n\hline\nKeyword & f(\\%%) & $\sigma$\\\\\n\hline\n"
                    % (comm_label[com], comm_size[com], comm_innerw[com]))
                for i in range(len(stuffK[com])):
                    if len(stuffK[com][i][0]) < 30:
                        f_out.write("%s & %1.2f & %1.2f\\\\\n" %
                                    (stuffK[com][i][0], stuffK[com][i][1],
                                     stuffK[com][i][2]))
                    else:
                        aux = stuffK[com][i][0].rfind(' ')
                        while aux > 30:
                            aux = stuffK[com][i][0][0:aux].rfind(' ')
                        f_out.write("%s &  & \\\\\n" %
                                    (stuffK[com][i][0][0:aux]))
                        f_out.write("$\quad$%s & %1.2f & %1.2f\\\\\n" %
                                    (stuffK[com][i][0][aux:],
                                     stuffK[com][i][1], stuffK[com][i][2]))
                for i in range(max(0, 20 - len(stuffK[com]))):
                    f_out.write(" &  & \\\\\n")
            else:
                f_out.write(
                    "\clearpage\n\n\\begin{table}[!ht]\n\caption{The community ``?'' contains $N = %d$ articles. Its average internal link weight is $<\omega_{in}> \simeq 1/%d$ }\n\\textcolor{white}{aa}\\\\\n{\scriptsize\\begin{tabular}{|l r r|}\n\hline\nKeyword & f(\\%%) & $\sigma$\\\\\n\hline\n"
                    % (comm_size[com], comm_innerw[com]))
                for i in range(20):
                    f_out.write(" &  & \\\\\n")
            #S
            f_out.write(
                "\hline\n\hline\nSubject & f(\\%) & $\sigma$\\\\\n\hline\n")
            if com in stuffS:
                for i in range(len(stuffS[com])):
                    f_out.write("%s & %1.2f & %1.2f\\\\\n" %
                                (stuffS[com][i][0], stuffS[com][i][1],
                                 stuffS[com][i][2]))
                for i in range(max(0, 10 - len(stuffS[com]))):
                    f_out.write(" &  & \\\\\n")
            else:
                for i in range(10):
                    f_out.write(" &  & \\\\\n")
            #J
            f_out.write(
                "\hline\n\hline\nJournal & f(\\%) & $\sigma$\\\\\n\hline\n")
            if com in stuffJ:
                for i in range(len(stuffJ[com])):
                    f_out.write("%s & %1.2f & %1.2f\\\\\n" %
                                (stuffJ[com][i][0], stuffJ[com][i][1],
                                 stuffJ[com][i][2]))
                for i in range(max(0, 10 - len(stuffJ[com]))):
                    f_out.write(" &  & \\\\\n")
            else:
                for i in range(10):
                    f_out.write(" &  & \\\\\n")
            f_out.write("\hline\n\end{tabular}\n}\n")
            #f_out.write("\hline\n\end{tabular}\n}\n\end{table}\n\n")
            #I
            f_out.write(
                "{\scriptsize\\begin{tabular}{|l r r|}\n\hline\nInstitution & f(\\%) & $\sigma$\\\\\n\hline\n"
            )
            if com in stuffI:
                for i in range(len(stuffI[com])):
                    if len(stuffI[com][i][0]) < 30:
                        f_out.write("%s & %1.2f & %1.2f\\\\\n" %
                                    (stuffI[com][i][0], stuffI[com][i][1],
                                     stuffI[com][i][2]))
                    else:
                        aux = stuffI[com][i][0].rfind(' ')
                        while aux > 30:
                            aux = stuffI[com][i][0][0:aux].rfind(' ')
                        f_out.write("%s &  & \\\\\n" %
                                    (stuffI[com][i][0][0:aux]))
                        f_out.write("$\quad$%s & %1.2f & %1.2f\\\\\n" %
                                    (stuffI[com][i][0][aux:],
                                     stuffI[com][i][1], stuffI[com][i][2]))
                for i in range(max(0, 20 - len(stuffI[com]))):
                    f_out.write(" &  & \\\\\n")
            else:
                for i in range(20):
                    f_out.write(" &  & \\\\\n")
            #C
            f_out.write(
                "\hline\n\hline\nCountry & f(\\%) & $\sigma$\\\\\n\hline\n")
            if com in stuffC:
                for i in range(len(stuffC[com])):
                    f_out.write("%s & %1.2f & %1.2f\\\\\n" %
                                (stuffC[com][i][0], stuffC[com][i][1],
                                 stuffC[com][i][2]))
                for i in range(max(0, 10 - len(stuffC[com]))):
                    f_out.write(" &  & \\\\\n")
            else:
                for i in range(10):
                    f_out.write(" &  & \\\\\n")
            #A
            f_out.write(
                "\hline\n\hline\nAuthor & f(\\%) & $\sigma$\\\\\n\hline\n")
            if com in stuffA:
                for i in range(len(stuffA[com])):
                    f_out.write("%s & %1.2f & %1.2f\\\\\n" %
                                (stuffA[com][i][0], stuffA[com][i][1],
                                 stuffA[com][i][2]))
                for i in range(max(0, 10 - len(stuffA[com]))):
                    f_out.write(" &  & \\\\\n")
            else:
                for i in range(10):
                    f_out.write(" &  & \\\\\n")
            f_out.write("\hline\n\end{tabular}\n}\n")
            #R
            f_out.write(
                "{\scriptsize\\begin{tabular}{|l r r|}\n\hline\nReference & f(\\%) & $\sigma$\\\\\n\hline\n"
            )
            if com in stuffR:
                for i in range(len(stuffR[com])):
                    if len(stuffR[com][i][0]) < 50:
                        f_out.write("%s & %1.2f & %1.2f\\\\\n" %
                                    (stuffR[com][i][0], stuffR[com][i][1],
                                     stuffR[com][i][2]))
                    elif len(stuffR[com][i][0]) < 90:
                        aux = stuffR[com][i][0].rfind(' ')
                        while aux > 50:
                            aux = stuffR[com][i][0][0:aux].rfind(' ')
                        f_out.write("%s &  & \\\\\n" %
                                    (stuffR[com][i][0][0:aux]))
                        f_out.write("$\quad$%s & %1.2f & %1.2f\\\\\n" %
                                    (stuffR[com][i][0][aux:],
                                     stuffR[com][i][1], stuffR[com][i][2]))
                    else:
                        aux1 = stuffR[com][i][0].rfind(' ')
                        while aux1 > 90:
                            aux1 = stuffR[com][i][0][0:aux1].rfind(' ')
                        aux2 = stuffR[com][i][0][0:aux1].rfind(' ')
                        while aux2 > 50:
                            aux2 = stuffR[com][i][0][0:aux2].rfind(' ')
                        f_out.write("%s &  & \\\\\n" %
                                    (stuffR[com][i][0][0:aux2]))
                        f_out.write("$\quad$%s &  & \\\\\n" %
                                    (stuffR[com][i][0][aux2:aux1]))
                        f_out.write("$\quad$%s & %1.2f & %1.2f\\\\\n" %
                                    (stuffR[com][i][0][aux1:],
                                     stuffR[com][i][1], stuffR[com][i][2]))
                for i in range(max(0, 25 - len(stuffR[com]))):
                    f_out.write(" &  & \\\\\n")
            else:
                for i in range(25):
                    f_out.write(" &  & \\\\\n")
            #RJ
            f_out.write(
                "\hline\n\hline\nRefJournal & f(\\%) & $\sigma$\\\\\n\hline\n")
            if com in stuffRJ:
                for i in range(len(stuffRJ[com])):
                    if len(stuffRJ[com][i][0]) < 50:
                        f_out.write("%s & %1.2f & %1.2f\\\\\n" %
                                    (stuffRJ[com][i][0], stuffRJ[com][i][1],
                                     stuffRJ[com][i][2]))
                    else:
                        aux = stuffRJ[com][i][0].rfind(' ')
                        while aux > 50:
                            aux = stuffRJ[com][i][0][0:aux].rfind(' ')
                        f_out.write("%s &  & \\\\\n" %
                                    (stuffRJ[com][i][0][0:aux]))
                        f_out.write("$\quad$%s &  & \\\\\n" %
                                    (stuffRJ[com][i][0][aux:]))
                for i in range(max(0, 10 - len(stuffRJ[com]))):
                    f_out.write(" &  & \\\\\n")
            else:
                for i in range(10):
                    f_out.write(" &  & \\\\\n")
            f_out.write("\hline\n\end{tabular}\n}\n\end{table}\n\n")

    #.. end
    f_out.write("\end{landscape}\n\n\end{document}\n")
    f_out.close()
    if verbose:
        print "..Communities caracteristics extracted in .tex 'IDCards' file"

    ##############################
    ## OUTPUT GEPHI FILES

    #... output gephi
    if verbose: print "..Preparing gephi gdf file for BC communities network"

    ## ... ini
    name = "BC_comm_level%d(bcthr=%d, thr=%d).gdf" % (level, bcthr, thr)
    dst = os.path.join(out_dir, name)
    f_gephi = open(dst, 'w')
    ## ... prep nodes
    if verbose: print "....nodes"
    f_gephi.write(
        "nodedef>name VARCHAR,label VARCHAR,size DOUBLE,inv_innerweight DOUBLE\n"
    )
    for com in comm_size:
        if comm_size[com] > thr:
            f_gephi.write(
                "%d,'%s',%d,%1.0f\n" %
                (com, comm_label[com], comm_size[com], comm_innerw[com]))
    ## ... prep edges
    if verbose: print "....edges"
    f_gephi.write(
        "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,logweight DOUBLE\n")
    for com1 in list_nodes:
        for com2 in list_nodes:
            size1 = len(list_nodes[com1])
            size2 = len(list_nodes[com2])
            if size1 > thr and size2 > thr and com1 > com2:
                W = 0
                for id1 in list_nodes[com1]:
                    for id2 in list_nodes[com2]:
                        if id2 in G.edge[id1]:
                            W += G.edge[id1][id2]['weight']
                W *= 1000.0 / (size1 * size2)
                if W > 0.000001:
                    f_gephi.write(
                        "%d,%d,%1.9f,%1.2f\n" %
                        (com1, com2, W, 6 + math.log(W) / math.log(10)))
    ## ... end
    f_gephi.close()
    if verbose: print "..Done!\n"

    ##
    ##

    ##... output the BC networks?
    confirm = raw_input(
        "..There are %d articles in the BC network.\n....do you want to create a gephi file with the BC networks at the articles level? (y/n): "
        % (len(G.nodes())))
    if confirm == 'y':
        ## ... ini
        name = "BCnetwork(bcthr=%d, thr=%d).gdf" % (bcthr, thr)
        dst = os.path.join(out_dir, name)
        f_gephi = open(dst, 'w')
        ## ... prep nodes
        if verbose: print "....nodes"
        f_gephi.write(
            "nodedef>name VARCHAR,label VARCHAR,BCcom VARCHAR,firstAU VARCHAR,journal VARCHAR,year VARCHAR,nb_refs DOUBLE\n"
        )
        pl = Utils.Article()
        pl.read_file(src1)
        for l in pl.articles:
            if l.id in partition:
                BCcom = partition[l.id]
                if comm_size[BCcom] > thr:
                    foo = l.firstAU + ', ' + l.journal + ', ' + str(l.year)
                    f_gephi.write("%d,'%s',%s,%s,%s,%d,%d\n" %
                                  (l.id, foo, str(BCcom), l.firstAU, l.journal,
                                   l.year, nR[l.id]))
        ## ... prep edges
        if verbose: print "....edges"
        f_gephi.write(
            "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,nb_comm_refs DOUBLE"
        )
        for i in BC_table:
            for j in BC_table[i]:
                if (i < j) and (i in partition) and (j in partition):
                    comi_size = comm_size[partition[i]]
                    comj_size = comm_size[partition[j]]
                    if (comi_size > thr) and (comj_size > thr):
                        w_ij = (1.0 * BC_table[i][j]) / math.sqrt(
                            nR[i] * nR[j])
                        f_gephi.write("\n%d,%d,%f,%d" %
                                      (i, j, w_ij, BC_table[i][j]))
        ## ... end
        f_gephi.close()
    if verbose: print "..Done!\n"

    ## ###################################
    ## END
    return
Exemple #17
0
#  Copyright (c) 2010 Howard Hughes Medical Institute.
#  All rights reserved.
#  Use is subject to Janelia Farm Research Campus Software Copyright 1.1 license terms.
#  http://license.janelia.org/license/jfrc_copyright_1_1.html
"""
A script to demonstrate community detection.
Uses the community module bundled with Neuroptikon and written by Thomas Aynaud <http://perso.crans.org/aynaud/communities/>.
"""

import community

updateProgress(gettext('Finding communities...'), forceDisplay=True)
dendogram = community.generate_dendogram(network.simplifiedGraph())
updateProgress(gettext('Finding communities...'))
partition = community.best_partition(dendogram)

if any(partition):
    updateProgress(gettext('Isolating communities...'))
    for visibles in list(display.visibles.itervalues()):
        for visible in visibles:
            if visible.isPath() and not isinstance(visible.client, Stimulus):
                startCommunity, endCommunity = [
                    partition[node.client.networkId]
                    for node in visible.pathEndPoints()
                ]
                if startCommunity != endCommunity:
                    display.removeVisible(visible)

    updateProgress(gettext('Visually separating communities...'))
    display.setViewDimensions(2)
    for obj in network.objects:
Exemple #18
0
import networkx as nx
import community
from stratagies import *
import Queue
G = nx.read_edgelist("nets/"+"GTCom-lj (4833).ungraph.txt", delimiter=",")

split_biggest_comm(G, 689, 1)

x = community.generate_dendogram(G)
print x

import json
from util.read_utils import lines_per_n
import community
import networkx as nx

author_graph = nx.DiGraph()
with open('clean_data.json', 'r') as jfile:
    for chunk in lines_per_n(jfile, 9):
        hdr_data = json.loads(chunk)
        for to_addr in str(hdr_data['To']).split(","):
            if '@' in to_addr:
                author_graph.add_edge(str(hdr_data['From']), to_addr.strip(), style='solid', label=hdr_data['Time'])
        for cc_addr in str(hdr_data['Cc']).split(","):
            if '@' in to_addr:
                author_graph.add_edge(str(hdr_data['From']), cc_addr.strip(), style='dashed', label=hdr_data['Time'])
    jfile.close()

print("No. of Weakly Connected Components:", nx.number_weakly_connected_components(author_graph))
print("No. of Strongly Connected Components:", nx.number_strongly_connected_components(author_graph))
print("Nodes:", nx.number_of_nodes(author_graph))
print("Edges:", nx.number_of_edges(author_graph))

#The following lines of code generate a dendogram for the above graph
dendo = community.generate_dendogram(author_graph.to_undirected())
for level in range(len(dendo)) :
    print("Partition at level", level, "is", community.partition_at_level(dendo, level))
    print("-"*10)
Exemple #20
0
def CC_network(in_dir, out_dir, verbose):

    ## INPUT DATA
    if verbose: print "..Initialize"
    src1 = os.path.join(in_dir, "articles.dat")
    src5 = os.path.join(in_dir, "references.dat")

    Ymin = 2100
    Ymax = 1900
    pl = Utils.Article()
    pl.read_file(src1)
    nb_art = len(pl.articles)
    art_table = dict()
    for i in range(nb_art):
        art_table[i] = []
    doc_table = dict()
    id = 0
    for l in pl.articles:
        doc_table[id] = dict()
        doc_table[id]['firstAU'] = l.firstAU
        doc_table[id]['year'] = l.year
        doc_table[id]['journal'] = l.journal
        doc_table[id]['citation'] = l.times_cited
        doc_table[id]['title'] = l.title
        doc_table[id]['de_keywords'] = l.de_keywords
        doc_table[id]['id_keywords'] = l.id_keywords
        doc_table[id]['abstract'] = l.abstract
        id = id + 1

    for l in pl.articles:
        if (l.year > 1900 and l.year < 2000):
            if (l.year > Ymax): Ymax = l.year
            if (l.year < Ymin): Ymin = l.year

    if verbose: print "..Create Co-citation Network weight table"

    if verbose: print "....loading refs table"
    pl = Utils.Ref()
    pl.read_file(src5)
    nb_total_refs = len(pl.refs)
    CC_table = dict()
    nA = dict()
    ref_index = dict()
    for l in pl.refs:
        foo = l.firstAU + ', ' + str(
            l.year) + ', ' + l.journal + ', ' + l.volume + ', ' + l.page
        if l.refid not in ref_index:
            ref_index[l.refid] = dict()
            ref_index[l.refid]['firstAU'] = l.firstAU
            ref_index[l.refid]['year'] = l.year
            ref_index[l.refid]['journal'] = l.journal
            ref_index[l.refid]['volume'] = l.volume
            ref_index[l.refid]['page'] = l.page
            ref_index[l.refid]['doi'] = l.doi
            ref_index[l.refid]['article'] = []
            ref_index[l.refid]['SubCommID'] = -1
            ref_index[l.refid]['modularity'] = -1
        ref_index[l.refid]['article'].append(l.id)
        art_table[l.id].append(l.refid)
        if l.refid not in nA:
            nA[l.refid] = 1
        else:
            nA[l.refid] += 1
    nb_refs = len(nA)

    if verbose: print "....detect common articles"
    for foo in art_table:
        if (len(art_table[foo]) > 1):
            for i in art_table[foo]:
                for j in art_table[foo]:
                    if (i < j):
                        if i not in CC_table: CC_table[i] = dict()
                        if j not in CC_table[i]: CC_table[i][j] = 0
                        CC_table[i][j] += 1

    # choose threshold
    confirm = 'n'
    thr = 5

    while confirm != 'y':
        if thr == 1:
            print "Keep BC links between articles sharing at least %d reference" % (
                thr)
        else:
            print "Keep BC links between articles sharing at least %d references" % (
                thr)
        confirm = raw_input("Confirm (y/n): ")
        while confirm not in ['n', 'y']:
            confirm = raw_input("...typing error!\n Confirm (y/n): ")
        if confirm == 'n':
            thr = input(
                "threshold for BC links -- articles should be share at least ? references:"
            )

    ccthr = thr
    confirm = 'n'
    ref_journal_list = [
        'J OPER MANA IN PRESS', 'J OPER MANAG', 'J OPER MANAG FORTHCO',
        'J OPERATIONS MANAGE', 'J OPERATIONS MANAGEM', 'J. Oper. Manag.',
        'Journal of Operations Management', 'M&SOM-MANUF SERV OP',
        'MANUF SERV IN PRESS', 'MANUF SERV OPER MANA', 'MANUF SERV OPERAT MA',
        'MANUF SERVICE OPERAT',
        'Manufacturing & Service Operations Management',
        'MANUFACTURING SERVIC', 'PROD OPER M IN PRESS', 'PROD OPER MANAG',
        'PROD OPERAT MANAGEM', 'Production and Operations Management',
        'PRODUCTION OPER MANA', 'Production Oper. Management',
        'PRODUCTION OPERATION', 'PRODUCTIONS OPERATIO'
    ]
    ref_journal_flag = False
    print "Do you want the journal of references belong to the list below?"
    for foo in ref_journal_list:
        print foo
    confirm = raw_input("Confirm (y/n): ")
    while confirm not in ['n', 'y']:
        confirm = raw_input("...typing error!\n Confirm (y/n): ")
    if confirm == 'y':
        ref_journal_flag = True

    ##############################
    ## BC COMMUNITIES
    if verbose: print "..CC communities"
    #... define BC network
    if verbose: print "....define graph in networkx format"
    G = nx.Graph()
    for i in CC_table:
        for j in CC_table[i]:
            if ((not ref_journal_flag) or
                (ref_journal_flag and ref_index[i]['journal']
                 in ref_journal_list and ref_index[j]['journal']
                 in ref_journal_list)) and (CC_table[i][j] >= thr):
                w_ij = (1.0 * CC_table[i][j]) / math.sqrt(nA[i] * nA[j])
                G.add_edge(i, j, weight=w_ij)
    nx.draw_spring(G)
    dst = os.path.join(
        out_dir, 'CC-Network(ccthr=%d, thr=%d, ref_journal_flag=%s).png' %
        (ccthr, thr, ref_journal_flag))
    plt.savefig(dst)
    plt.close('all')

    #...
    if verbose: print "....computing communities with Louvain algo"
    dendogram = community.generate_dendogram(G, part_init=None)

    #... output infos
    print "....There are %d references in the database (contain duplicates)" % (
        nb_total_refs)
    print "....There are %d references in the database (contain no duplicate)" % (
        nb_refs)
    print "....There are %d references in the CC network\n......(ie sharing at least %d article(s) with another reference)" % (
        len(G.nodes()), ccthr)
    for level in range(len(dendogram)):
        part = community.partition_at_level(dendogram, level)
        mod = community.modularity(part, G)
        nb_comm = len(set(part.values()))
        size_sup10 = 0
        size_sup100 = 0
        #communities_caracteristics(partition, thr, level)
        for com in set(part.values()):
            list_nodes = [nodes for nodes in part.keys() if part[nodes] == com]
            if len(list_nodes) > 100: size_sup100 += 1
            if len(list_nodes) > 10: size_sup10 += 1
        print "....level %d: %d communities [%d with size > 10, %d with size > 100], modularity Q=%1.6f" % (
            level, nb_comm, size_sup10, size_sup100, mod)

    ##############################
    ## WHICH EXTRACTION ?
    print "..CC communities extraction"
    #
    confirm = 'n'
    level = len(dendogram) - 1
    thr = 0
    while confirm != 'y':
        part = community.partition_at_level(dendogram, level)
        nb_comm = len(set(part.values()))
        size_sup_thr = 0
        n_sup_thr = 0
        for com in set(part.values()):
            list_nodes = [nodes for nodes in part.keys() if part[nodes] == com]
            if len(list_nodes) > thr:
                n_sup_thr += len(list_nodes)
                size_sup_thr += 1
        print "....Extraction of level %d CC communities with size > %d\n......(%d articles gathered in %d communities):" % (
            level, thr, n_sup_thr, size_sup_thr)
        confirm = raw_input("....do you confirm? (y/n): ")
        if confirm == 'n':
            level = input("......level you want to extract:")
            thr = input("......keep communities of size > to:")

    #... partition
    partition = community.partition_at_level(dendogram, level)
    list_nodes = dict()
    for com in set(partition.values()):
        list_nodes[com] = [
            nodes for nodes in partition.keys() if partition[nodes] == com
        ]

    #############################
    # sub-community partition
    subcomm = dict()
    for com in list_nodes:
        # plot SubGraph for each community
        if verbose: print "....plot SubGraph for community %d" % (com)
        subG = nx.subgraph(G, list_nodes[com])
        nx.draw_spring(subG)
        dst = os.path.join(out_dir, 'SubGraph/Plot/SubGraph-%d.png' % (com))
        plt.savefig(dst)
        plt.close('all')
        # partition
        if verbose: print "....sub clustering for community %d" % (com)
        part = community.best_partition(subG)
        # basic descriptive statistics
        comm_size = len(subG.nodes())
        nb_comm = len(set(part.values()))
        subcomm[com] = dict()
        subcomm[com]['nb_comm'] = nb_comm
        subcomm[com]['size'] = comm_size
        mod = community.modularity(part, subG)
        # record each node's sub community id
        for refid in part.keys():
            ref_index[refid]['SubCommID'] = part[refid]
            ref_index[refid]['modularity'] = mod
        if verbose:
            print "......comm_size:%d, nb_comm:%d, modularity:%1.6f" % (
                comm_size, nb_comm, mod)
        # output gephi files
        if verbose:
            print "......generate gephi files for sub-community %d" % (com)
        name = "SubGraph/Gephi/SubCCnetwork%d(ccthr=%d, thr=%d, ref_journal_flag=%s).gdf" % (
            com, ccthr, thr, ref_journal_flag)
        dst = os.path.join(out_dir, name)
        f_gephi = open(dst, 'w')
        # nodes
        f_gephi.write(
            "nodedef>name VARCHAR,label VARCHAR,CCcom VARCHAR, Sub CCcom VARCHAR, Modularity VARCHAR, firstAU VARCHAR,journal VARCHAR,year VARCHAR,nb_arts DOUBLE,doi VARCHAR, volume VARCHAR, page VARCHAR\n"
        )
        for refid in part.keys():
            foo = ref_index[refid]['firstAU'] + ', ' + ref_index[refid][
                'journal'] + ', ' + str(ref_index[refid]['year'])
            f_gephi.write(
                "%d,'%s',%s,%s,%1.6f,%s,%s,%d,%d,%s,%s,%s\n" %
                (refid, foo, str(com), str(ref_index[refid]['SubCommID']),
                 ref_index[refid]['modularity'], ref_index[refid]['firstAU'],
                 ref_index[refid]['journal'], ref_index[refid]['year'],
                 nA[refid], ref_index[refid]['doi'],
                 ref_index[refid]['volume'], ref_index[refid]['page']))
        # edges
        f_gephi.write(
            "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,nb_comm_refs DOUBLE"
        )
        for i in part.keys():
            for j in part.keys():
                if (i < j):
                    if i in CC_table:
                        if j in CC_table[i]:
                            w_ij = (1.0 * CC_table[i][j]) / math.sqrt(
                                nA[i] * nA[j])
                            f_gephi.write("\n%d,%d,%f,%d" %
                                          (i, j, w_ij, CC_table[i][j]))
        # end
        f_gephi.close()

    #.. comm_size
    comm_size = dict()
    for com in list_nodes:
        size = len(list_nodes[com])
        comm_size[com] = size

    # sort community by its size
    comm_size = dict()
    for com in list_nodes:
        size = len(list_nodes[com])
        comm_size[com] = size
    Lcomm_size = comm_size.items()
    Lcomm_size.sort(cmpval)

    ##############################
    # Research Base CSV files
    if verbose: print "..Research Base CSV files generating"
    filename = os.path.join(out_dir, "ResearchBase.dat")
    f_out = open(filename, "w")
    # header line
    f_out.write(
        "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %
        ('CommunityID', 'SubCommunityID', 'Modularity', 'Topic', 'SubTopic',
         'RefID', 'Volume', 'Page', 'Lable', 'Title', 'Keywords', 'firstAU',
         'Journal', 'Year', 'Citation', 'DOI'))
    for elm in Lcomm_size:
        com = elm[0]
        for ref in list_nodes[com]:
            foo = ref_index[ref]['firstAU'] + ', ' + ref_index[ref][
                'journal'] + ', ' + str(ref_index[ref]['year'])
            f_out.write(
                "%s\t%s\t%1.6f\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n"
                % (str(com), str(ref_index[ref]['SubCommID']),
                   ref_index[ref]['modularity'], '', '', str(ref),
                   str(ref_index[ref]['volume']), str(ref_index[ref]['page']),
                   foo, '', '', ref_index[ref]['firstAU'],
                   ref_index[ref]['journal'], str(ref_index[ref]['year']),
                   str(nA[ref]), ref_index[ref]['doi']))
    f_out.close()
    if verbose: print "..Done!\n"

    ##############################
    # Research Front CSV files
    if verbose: print "..Research Front CSV files generating"
    filename = os.path.join(out_dir, "ResearchFront.dat")
    f_out = open(filename, "w")
    # header line
    f_out.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %
                ('CommunityID', 'SubCommunityID', 'RefID', 'DocID', 'DocLable',
                 'Title', 'Year', 'Citation', 'DE-Keywords', 'ID-Keywords',
                 'Abstract'))
    for elm in Lcomm_size:
        com = elm[0]
        for ref in list_nodes[com]:
            for doc in ref_index[ref]['article']:
                foo = doc_table[doc]['firstAU'] + ', ' + doc_table[doc][
                    'journal'] + ', ' + str(doc_table[doc]['year'])
                f_out.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %
                            (str(com), str(ref_index[ref]['SubCommID']),
                             str(ref), str(doc), foo, doc_table[doc]['title'],
                             str(doc_table[doc]['year']),
                             str(doc_table[doc]['citation']),
                             doc_table[doc]['de_keywords'],
                             doc_table[doc]['id_keywords'],
                             doc_table[doc]['abstract']))
    f_out.close()
    if verbose: print "..Done!\n"

    #############################
    # OUTPUT GEPHI FILES

    #... output gephi
    # if verbose: print "..Preparing gephi gdf file for CC communities network"

    # ... ini
    # name = "CC_comm_level%d(ccthr=%d, thr=%d, ref_journal_flag=%s).gdf" % (level,ccthr,thr,ref_journal_flag)
    # dst = os.path.join(out_dir, name)
    # f_gephi = open(dst,'w')
    # ... prep nodes
    # if verbose: print "....nodes"
    # f_gephi.write("nodedef>name VARCHAR,label VARCHAR,size DOUBLE,inv_innerweight DOUBLE\n")

    # for com in comm_size:
    # if (comm_size[com] > thr) and (com in comm_label): f_gephi.write("%d,'%s',%d,%1.0f\n" % (com, comm_label[com], comm_size[com], comm_innerw[com]) )
    # ... prep edges
    # if verbose: print "....edges"
    # f_gephi.write("edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,logweight DOUBLE\n")
    # for com1 in list_nodes:
    # for com2 in list_nodes:
    # size1 = len(list_nodes[com1]); size2 = len(list_nodes[com2]);
    # if size1 > thr and size2 > thr and com1 > com2:
    # W = 0;
    # for id1 in list_nodes[com1]:
    # for id2 in list_nodes[com2]:
    # if id2 in G.edge[id1]:
    # W += G.edge[id1][id2]['weight']
    # W *= 1000.0 / (size1 * size2)
    # if W > 0.000001:
    # f_gephi.write("%d,%d,%1.9f,%1.2f\n" % (com1, com2, W, 6 + math.log(W)/math.log(10)) )
    # ... end
    # f_gephi.close()
    # if verbose: print"..Done!\n"

    ##
    ##

    ##... output the CC networks?
    confirm = raw_input(
        "..There are %d articles in the CC network.\n....do you want to create a gephi file with the CC networks at the articles level? (y/n): "
        % (len(G.nodes())))
    if confirm == 'y':
        ## ... ini
        name = "CCnetwork(ccthr=%d, thr=%d, ref_journal_flag=%s).gdf" % (
            ccthr, thr, ref_journal_flag)
        dst = os.path.join(out_dir, name)
        f_gephi = open(dst, 'w')
        ## ... prep nodes
        if verbose: print "....nodes"
        f_gephi.write(
            "nodedef>name VARCHAR,label VARCHAR,CCcom VARCHAR, Sub CCcom VARCHAR, Modularity VARCHAR, firstAU VARCHAR,journal VARCHAR,year VARCHAR,nb_arts DOUBLE,doi VARCHAR, volume VARCHAR, page VARCHAR\n"
        )
        for refid in ref_index:
            if refid in partition:
                CCcom = partition[refid]
                if comm_size[CCcom] > thr:
                    foo = ref_index[refid]['firstAU'] + ', ' + ref_index[
                        refid]['journal'] + ', ' + str(
                            ref_index[refid]['year'])
                    f_gephi.write(
                        "%d,'%s',%s,%s,%1.6f,%s,%s,%d,%d,%s,%s,%s\n" %
                        (refid, foo, str(CCcom),
                         str(ref_index[refid]['SubCommID']),
                         ref_index[refid]['modularity'],
                         ref_index[refid]['firstAU'],
                         ref_index[refid]['journal'], ref_index[refid]['year'],
                         nA[refid], ref_index[refid]['doi'],
                         ref_index[refid]['volume'], ref_index[refid]['page']))
        ## ... prep edges
        if verbose: print "....edges"
        f_gephi.write(
            "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,nb_comm_refs DOUBLE"
        )
        for i in CC_table:
            for j in CC_table[i]:
                if (i < j) and (i in partition) and (j in partition):
                    commi_size = comm_size[partition[i]]
                    commj_size = comm_size[partition[j]]
                    if (commi_size > thr) and (commj_size > thr):
                        w_ij = (1.0 * CC_table[i][j]) / math.sqrt(
                            nA[i] * nA[j])
                        f_gephi.write("\n%d,%d,%f,%d" %
                                      (i, j, w_ij, CC_table[i][j]))
        ## ... end
        f_gephi.close()
        if verbose: print "..Done!\n"

    ##############################
    # Main Community Characteristics file
    type = "main"
    confirm = raw_input(
        "..Do you want to extract the characteristics for main communitise? \n Confirm (y/n):"
    )
    if confirm == 'y':
        label = report.community_characteristics(in_dir, out_dir, type, ccthr,
                                                 thr, ref_journal_flag, G,
                                                 level, partition, list_nodes,
                                                 art_table, verbose)

    ##############################
    # Sub Community Characteristics files
    if verbose: print "..Sub Computing communities caracteristics"
    confirm = raw_input(
        "..Do you want to extract the characteristics for sub communitise? \n Confirm (y/n):"
    )
    if confirm == 'y':
        sub_label = dict()
        for com in list_nodes:
            type = str(com)
            subG = nx.subgraph(G, list_nodes[com])
            level = len(community.generate_dendogram(subG)) - 1
            sub_partition = community.best_partition(subG)
            sub_list_nodes = dict()
            for ref in sub_partition:
                sub_comm = sub_partition[ref]
                if sub_comm in sub_list_nodes:
                    sub_list_nodes[sub_comm].append(ref)
                else:
                    sub_list_nodes[sub_comm] = []
            sub_label[com] = report.community_characteristics(
                in_dir, out_dir, type, ccthr, thr, ref_journal_flag, subG,
                level, sub_partition, sub_list_nodes, art_table, verbose,
                label)
    ##############################
    # Community Characteristics PDF generation
    confirm = raw_input(
        "..Do you want to generate the pdf files of characteristics for communitise? \n Confirm (y/n):"
    )
    if confirm == 'y':
        report.latex(os.path.join(out_dir, "Report"), verbose)

    ## ###################################
    ## END
    return
def GenerateDendorgram(graph):
    global dendorgram
    dendorgram = community.generate_dendogram(graph)
 def print_dendrogram(self):
     dendo = community.generate_dendogram(self.G)
     for level in range(len(dendo) - 1):
         print "partition at level", level, "is", community.partition_at_level(
             dendo, level)
print col_name_list

#creating a dictionary with key = 'id' and value = ids in ref_id
mygraph = {}
for row in list(res):
    key = row['id']
    if row['ref_id'] is not u'' or None:
        val = map(int, row['ref_id'].strip().split(";"))
        mygraph[key] = val
    else:
        mygraph[key] = row['ref_id']

#generating the dendogram
G = nx.from_dict_of_lists(mygraph)
nx.write_adjlist(G, "test.adjlist")
dendo = community.generate_dendogram(G)
comDict = community.partition_at_level(dendo, 3)

resultDict = {}
#Calculating -  Dissimilarity Matrix
#for all pair of vertices find the dissimilarity matrix
for (key1, val1), (key2, val2) in combinations(mygraph.items(), 2):
    #vertices common to both the vertices key1 and key2 will be
    #in intersection list of its values
    neighbours = [val for val in val1 if val in val2]
    dissimilarIndx = 0
    for pair in combinations(neighbours, 2):
        for a, b in pair:
            if (comDict[a] != comDict[b]):
                dissimilarIndx = dissimilarIndx + 1
 def __GenerateDendorgram(self):
     global dendorgram
     dendorgram = community.generate_dendogram(self.m_graph)
Exemple #25
0
print("Computing modularities...")
print(">> max-core communities: {}".format(len(max_core_communities)))
modularity_max_core = cm.modularity(G, max_core_communities)
print(">> max-core modularity: {}".format(modularity_max_core))
print(">> 4-core communities: {}".format(len(four_cores_communities)))
modularity_four_core = cm.modularity(G, four_cores_communities)
print(">> 4-core modularity: {}".format(modularity_four_core))

print("Computing wcc...")
wcc_max_core = cm.wcc(max_core_communities[0], G)
print(">> max-core wcc: {}".format(wcc_max_core))
wcc_four_core = cm.wcc(four_cores_communities[0], G)
print(">> 4-core wcc: {} size: {}".format(wcc_four_core, four_cores_communities[0].number_of_nodes()))

print("Computing louvain...")
dendo = generate_dendogram(G)
def uniq(lst):
    return len(set(lst))
louvain_steps = [uniq(prt.values()) for prt in dendo]
print(">> Louvain Steps:", louvain_steps)

print("Saving to {}".format(OUT_FILE))
RESULT = {
    "max_core": max_core,
    "num_4-cores": num_4_core,
    "modularity_max-cores": modularity_max_core,
    "modularity_4-cores": modularity_four_core,
    "wcc_max-cores": wcc_max_core,
    "wcc_4-cores": wcc_four_core,
    "louvain_steps": louvain_steps
}
for com in set(parts.values()) :
    count = count + 1.
    list_nodes = [nodes for nodes in parts.keys()
                                if parts[nodes] == com]
    nx.draw_networkx_nodes(G_fb, spring_pos, list_nodes, node_size = 15,
                                node_color = str(count / size))
nx.draw_networkx_edges(G_fb,spring_pos, alpha=0.5)
plt.show()





##### Dendo graph ######

dendo = community.generate_dendogram(G_fb)
for level in range(len(dendo) - 1) :
   print "partition at level", level, "is", community.partition_at_level(dendo, level)



##### induced graph ####

G=community.induced_graph(parts, G_fb)
#nx.draw_networkx(G, pos = spring_pos, cmap = plt.get_cmap("jet"), node_color = values, node_size = 15, with_labels = False)
nx.draw_networkx(G)




Exemple #27
0
	G=nx.Graph()
	for i in CC_table:
		for j in CC_table[i]:
			if ((not ref_journal_flag) or (ref_journal_flag and ref_index[i]['journal'] in ref_journal_list and ref_index[j]['journal'] in ref_journal_list)) and (CC_table[i][j]>=ccthr):
				w_ij = (1.0 * CC_table[i][j]) / math.sqrt(nA[i] * nA[j])
				G.add_edge(i, j, weight=w_ij)
	
	#... calculate basic centrality for each node
	if verbose: print "..calculate basic centrality for each node"
	degree = nx.degree_centrality(G)
	closeness = nx.closeness_centrality(G)
	betweenness =  nx.betweenness_centrality(G)
	
	#...
	if verbose: print "....computing communities with Louvain algo"
	dendogram = community.generate_dendogram(G, part_init=None)

	#... output infos
	print "....There are %d references in the database (contain duplicates)" % (nb_total_refs)
	print "....There are %d references in the database (contain no duplicate)" % (nb_refs)
	print "....There are %d references in the given journals (contain no duplicate)" % (nb_given_journals_refs)
	print "....There are %d references in the CC network\n......(ie sharing at least %d article(s) with another reference)" % (len(G.nodes()), ccthr)
	for level in range(len(dendogram)):
		part = community.partition_at_level(dendogram, level)
		mod = community.modularity(part, G)
		nb_comm = len(set(part.values()))
		size_sup10 = 0; size_sup100 = 0;  #communities_caracteristics(partition, thr, level)
		for com in set(part.values()) :
			list_nodes = [nodes for nodes in part.keys() if part[nodes] == com]
			if len(list_nodes) > 100: size_sup100 += 1
			if len(list_nodes) > 10: size_sup10 += 1