Esempio n. 1
0
def cluster_sp():
    similaritymatrix = get_matrix("datasets/similaritymatrix.csv")
    graph = add_graph(similaritymatrix, 1000)
    dendo = get_dendrogram(graph)
    bestpartition = best_partition(graph)
    result = dictionaries_to_dataframe(bestpartition, dendo[0], 'SP ID',
                                       'State', 'City')
    result.to_csv('datasets/result.csv', sep=',', index=False)
    cities_graph = community.induced_graph(dendo[0],
                                           graph).edges(data='weight')
    list_to_csv(cities_graph, 'cities_graph.csv')
    states_graph = community.induced_graph(bestpartition,
                                           graph).edges(data='weight')
    list_to_csv(states_graph, 'states_graph.csv')
Esempio n. 2
0
def makeCommunityInducedGraph(graph, partition, weight=True):
    """
	Get Community Induced Graph of the partition
	"""
    induced_subgraph = None
    if weight == True:
        induced_subgraph = community.induced_graph(partition,
                                                   graph,
                                                   weight='weight')
    else:
        induced_subgraph = community.induced_graph(partition,
                                                   graph,
                                                   weight=None)
    return induced_subgraph
Esempio n. 3
0
    def test_weight(self):
        """
        Test that total edge weight does not change
        """
        graph = nx.erdos_renyi_graph(50, 0.1)
        part = dict([])
        for node in graph.nodes():
            part[node] = node % 5
        self.assertEqual(graph.size(weight='weight'),
                         co.induced_graph(part, graph).size(weight='weight'))

        for e1, e2 in graph.edges_iter():
            graph[e1][e2]["test_weight"] = 2.

        self.assertEqual(graph.size(weight='test_weight'),
                         co.induced_graph(part, graph, "test_weight").size(weight='test_weight'))
Esempio n. 4
0
def plot_community_graph(graph,
                         partition,
                         figsize=(8, 8),
                         node_size=200,
                         plot_overlaps=False,
                         plot_labels=False,
                         cmap=None,
                         top_k=None,
                         min_size=None):
    """
    Plot a algorithms-graph with node color coding for communities.

    :param graph: NetworkX/igraph graph
    :param partition: NodeClustering object
    :param figsize: the figure size; it is a pair of float, default (8, 8)
    :param node_size: int, default 200
    :param plot_overlaps: bool, default False. Flag to control if multiple algorithms memberships are plotted.
    :param plot_labels: bool, default False. Flag to control if node labels are plotted.
    :param cmap: str or Matplotlib colormap, Colormap(Matplotlib colormap) for mapping intensities of nodes. If set to None, original colormap is used..
    :param top_k: int, Show the top K influential communities. If set to zero or negative value indicates all.
    :param min_size: int, Exclude communities below the specified minimum size.

    Example:

    >>> from cdlib import algorithms, viz
    >>> import networkx as nx
    >>> g = nx.karate_club_graph()
    >>> coms = algorithms.louvain(g)
    >>> viz.plot_community_graph(g, coms)
    """

    cms = __filter(partition.communities, top_k, min_size)

    node_to_com = {}
    for cid, com in enumerate(cms):
        for node in com:
            if node not in node_to_com:
                node_to_com[node] = cid
            else:
                # duplicating overlapped node
                alias = "%s_%s" % (node, cid)
                node_to_com[alias] = cid
                edges = [(alias, y) for y in graph.neighbors(node)]
                graph.add_edges_from(edges)

    # handling partial coverage
    s = nx.subgraph(graph, node_to_com.keys())

    # algorithms graph construction
    c_graph = induced_graph(node_to_com, s)
    node_cms = [[node] for node in c_graph.nodes()]

    return plot_network_clusters(c_graph,
                                 NodeClustering(node_cms, None, ""),
                                 nx.spring_layout(c_graph),
                                 figsize=figsize,
                                 node_size=node_size,
                                 plot_overlaps=plot_overlaps,
                                 plot_labels=plot_labels,
                                 cmap=cmap)
def external_ec_coarsening(graph, sfdp_path, coarsening_scheme = 2, c_type = 'original'):
    if c_type == 'louvain':
        print("Coarsening with Louvain")
        matrix = magicgraph.to_adjacency_matrix(graph)
        nx_graph = nx.from_scipy_sparse_matrix(matrix)
        dendro = community.generate_dendrogram(nx_graph)

        coarse_graphs = [DoubleWeightedDiGraph(graph)]
        merges = []
        i = 0
        for l in range(len(dendro)):
            level = community.partition_at_level(dendro, l)
            induced = community.induced_graph(level, nx_graph)
            filename = 'induced'+str(l)+'.edgelist'
            #nx.write_edgelist(induced, filename)
            # write weighted graph to file
            f = open(filename, 'w')
            for u, v, a in induced.edges.data('weight', default = 1):
                line = ' '.join([str(u), str(v), str(a)])
                f.write(line + '\n')
            f.close()
            m_graph = magicgraph.load_weighted_edgelist(filename, undirected = True)
            coarse_graphs.append(DoubleWeightedDiGraph(m_graph))
            merges.append(level)
            print('Level: ', i, 'N nodes: ', m_graph.number_of_nodes())
            i+= 1

        return coarse_graphs, merges
    elif c_type == 'original':
        return original_coarsening(graph, sfdp_path, coarsening_scheme)
Esempio n. 6
0
def calc_louvain(adj_matrix, level=0, return_c_graph=False):
    nx_G = nx.from_numpy_matrix(adj_matrix)
    dendro = louvain.generate_dendrogram(
        nx_G, randomize=False)  #Maybe set tandomize True

    if len(dendro) - level - 1 < 0:
        raise Exception("The given Level is too deep. The maximum is: " +
                        str(len(dendro) - 1))

    communities = louvain.partition_at_level(dendro, len(dendro) - level - 1)
    number_communities = max(communities, key=lambda x: communities[x]) + 1

    # Maybe unnecessary after some code rework and unification
    community_list = []
    for i in range(number_communities):
        grp_list = []
        for grp in communities:
            if communities[grp] == i:
                grp_list.append(grp)
        else:
            if grp_list:
                community_list.append(grp_list)

    community_level_G = louvain.induced_graph(communities, nx_G)

    if return_c_graph:
        c_level_graph = nx.adjacency_matrix(community_level_G)
    else:
        c_level_graph = None

    return community_list, c_level_graph
Esempio n. 7
0
    def test_weight(self):
        """
        Test that total edge weight does not change
        """
        graph = nx.erdos_renyi_graph(50, 0.1)
        part = dict([])
        for node in graph.nodes():
            part[node] = node % 5
        self.assertEqual(graph.size(weight='weight'),
                         co.induced_graph(part, graph).size(weight='weight'))

        for src, dst in graph.edges_iter():
            graph[src][dst]["test_weight"] = 2.

        induced = co.induced_graph(part, graph, "test_weight")
        self.assertEqual(graph.size(weight='test_weight'),
                         induced.size(weight='test_weight'))
 def test_nodes(self) :
     """
     Test that result nodes are the communities
     """
     g = nx.erdos_renyi_graph(50, 0.1)
     part = dict([])
     for node in g.nodes() :
         part[node] = node % 5
     self.assertSetEqual(set(part.values()), set(co.induced_graph(part, g).nodes()))
 def test_weight(self) :
     """
     Test that total edge weight does not change
     """
     g = nx.erdos_renyi_graph(50, 0.1)
     part = dict([])
     for node in g.nodes() :
         part[node] = node % 5
     self.assertEqual(g.size(weight = 'weight'), co.induced_graph(part, g).size(weight = 'weight'))
Esempio n. 10
0
    def test_weight(self):
        """
        Test that total edge weight does not change
        """
        graph = nx.erdos_renyi_graph(50, 0.1)
        part = dict([])
        for node in graph.nodes():
            part[node] = node % 5
        self.assertEqual(graph.size(weight='weight'),
                         co.induced_graph(part, graph).size(weight='weight'))

        for e1, e2 in graph.edges():
            graph[e1][e2]["test_weight"] = 2.

        self.assertEqual(
            graph.size(weight='test_weight'),
            co.induced_graph(part, graph,
                             "test_weight").size(weight='test_weight'))
Esempio n. 11
0
def ppl(diary):

    # make people-event DataFrame
    ppl_evt = diary[['Event', 'Participants']].dropna(subset=['Participants'])

    evtXie = pd.DataFrame({
        "Event": ppl_evt['Event'].unique(),
        "Participants": "謝蘭生"
    })
    ppl_evt = ppl_evt.append(evtXie, ignore_index=True)

    # make edgelist from people-event DF
    el = ppl_evt.merge(ppl_evt, on="Event")
    el = el.drop("Event", axis=1).rename(columns={
        "Participants_x": "Source",
        "Participants_y": "Target"
    })
    el = el.query("Source<Target")
    el["Weight"] = 1

    # Calculate Weight of edges with Groupby
    edgelist = el.groupby(["Source", "Target"]).sum().reset_index()

    # export edges into a graphml file
    G = nx.from_pandas_dataframe(edgelist,
                                 source="Source",
                                 target="Target",
                                 edge_attr="Weight")
    nx.set_node_attributes(G, "k-core", nx.core_number(G))
    communityDict = community.best_partition(G)
    nx.set_node_attributes(G, "community", communityDict)
    nx.set_node_attributes(
        G, "betweenness",
        nx.betweenness_centrality(G, 850, normalized=True, weight="Weight"))
    nx.write_graphml(G, "Graph/Network/ppl.graphml", encoding="utf-8")

    # export a graph of the relationship between communities
    G_commun = community.induced_graph(communityDict, G)
    nx.write_graphml(G_commun,
                     "Graph/Network/pplCommunity.graphml",
                     encoding="utf-8")

    # export nodes with attributes into a csv
    idx, attr = zip(*G.nodes(data=True))
    core = [d['k-core'] for d in attr]
    commun = [d['community'] for d in attr]
    betwn = [d['betweenness'] for d in attr]
    nodes_attr = pd.DataFrame(
        {
            'k-core': core,
            'community': commun,
            'betweenness centrality': betwn
        },
        index=idx)
    nodes_attr.to_csv("csv/pplCoreCommunity.csv", encoding='utf-8')

    return
Esempio n. 12
0
 def test_unique(self):
     """
     Test that the induced graph is the same when all nodes are alone
     """
     graph = nx.erdos_renyi_graph(50, 0.1)
     part = dict([])
     for node in graph.nodes():
         part[node] = node
     ind = co.induced_graph(part, graph)
     self.assertTrue(nx.is_isomorphic(graph, ind))
Esempio n. 13
0
 def test_weight(self):
     """
     Test that total edge weight does not change
     """
     g = nx.erdos_renyi_graph(50, 0.1)
     part = dict([])
     for node in g.nodes():
         part[node] = node % 5
     self.assertEqual(g.size(weight='weight'),
                      co.induced_graph(part, g).size(weight='weight'))
 def test_uniq(self) :
     """
     Test that the induced graph is the same when all nodes are alone
     """
     g = nx.erdos_renyi_graph(50, 0.1)
     part = dict([])
     for node in g.nodes() :
         part[node] = node
     ind = co.induced_graph(part, g)
     self.assert_(nx.is_isomorphic(g, ind))
Esempio n. 15
0
 def test_nodes(self):
     """
     Test that result nodes are the communities
     """
     graph = nx.erdos_renyi_graph(50, 0.1)
     part = dict([])
     for node in graph.nodes():
         part[node] = node % 5
     self.assertSetEqual(set(part.values()),
                         set(co.induced_graph(part, graph).nodes()))
Esempio n. 16
0
def plot_community_graph(graph,
                         partition,
                         figsize=(8, 8),
                         node_size=200,
                         plot_overlaps=False,
                         plot_labels=False):
    """
        Plot a algorithms-graph with node color coding for communities.

        :param graph: NetworkX/igraph graph
        :param partition: NodeClustering object
        :param figsize: the figure size; it is a pair of float, default (8, 8)
        :param node_size: int, default 200
        :param plot_overlaps: bool, default False. Flag to control if multiple algorithms memberships are plotted.
        :param plot_labels: bool, default False. Flag to control if node labels are plotted.

        Example:

        >>> from cdlib import algorithms, viz
        >>> import networkx as nx
        >>> g = nx.karate_club_graph()
        >>> coms = algorithms.louvain(g)
        >>> viz.plot_community_graph(g, coms)
        """

    cms = partition.communities

    node_to_com = {}
    for cid, com in enumerate(cms):
        for node in com:
            if node not in node_to_com:
                node_to_com[node] = cid
            else:
                # duplicating overlapped node
                alias = "%s_%s" % (node, cid)
                node_to_com[alias] = cid
                edges = [(alias, y) for y in graph.neighbors(node)]
                graph.add_edges_from(edges)

    # handling partial coverage
    s = nx.subgraph(graph, node_to_com.keys())

    # algorithms graph construction
    c_graph = induced_graph(node_to_com, s)
    node_cms = [[node] for node in c_graph.nodes()]

    return plot_network_clusters(c_graph,
                                 NodeClustering(node_cms, None, ""),
                                 nx.spring_layout(c_graph),
                                 figsize=figsize,
                                 node_size=node_size,
                                 plot_overlaps=plot_overlaps,
                                 plot_labels=plot_labels)
Esempio n. 17
0
def setup(g, num_players, num_seeds):
    #first compute the best partition
    partition = community.best_partition(g)
    induced_graph = community.induced_graph(partition, g)

    # Play around with picking the "best" community
    # node boundary?
    #print nx.current_flow_closeness_centrality(induced_graph) # not better
    # print nx.katz_centrality(induced_graph) # doesn't converge
    #print nx.eigenvector_centrality(induced_graph) # not as good
    #print nx.communicability_centrality(induced_graph) # not as good
    #{0: 8.451771641899612, 1: 9.041654401534407, 2: 9.321830560246685, 3: 8.79634625159723, 4: 7.512000387517644, 5: 9.319261339431147, 6: 8.635502364748598, 7: 9.182167514276696, 8: 8.812816793986622, 9: 5.955242238035001, 10: 7.224124906314186, 11: 8.598864555204745, 12: 1.3780813983087927, 13: 8.574141188778002, 14: 1.4894068385674029}
    #{0: 0.03170498456257798, 1: 0.03351885293616147, 2: 0.982004394865475, 3: 0.009750044520081363, 4: 0.012642119637055598, 5: 0.08211419419246402, 6: 0.013202397926046897, 7: 0.15814666928657686, 8: 0.026268239793024895, 9: 0.0005523351650465954, 10: 0.0009839216844465231, 11: 0.019821817113884598, 12: 4.399697547690089e-05, 13: 0.016495461620553098, 14: 0.00022120782918811697}
    #{0: 1670.2226290285078, 1: 3648.298186716118, 2: 4153.05229512053, 3: 3214.282455755265, 4: 561.0349179323383, 5: 4068.320908838754, 6: 2977.2760610270666, 7: 3474.488922208751, 8: 3493.8811964338947, 9: 1521.5720949300896, 10: 2520.2823105797784, 11: 1385.0884502097147, 12: 281.6674672972596, 13: 2306.8136315883607, 14: 358.98207498678886}


    # viewer.draw_graph(induced_graph)
    # try:
    #     plt.show()
    # except:
    #     plt.hide()

    # Choose the community with the most number of outgoing edges
    #weights = nx.communicability_centrality(induced_graph) #weight='weight'
    weights = nx.degree(induced_graph, weight='weight')
    #print weights

    best_com = max(weights, key=weights.__getitem__)

    com = defaultdict(list)
    for node, c in partition.iteritems():
        com[c].append(node)

    selected_comm = g.subgraph(com[best_com])

    # get one node from every clique
    #print selected_comm.number_of_nodes()
    # max_size_clique = nx.graph_clique_number(selected_comm)
    # print max_size_clique
    # lst = []
    # for cl in nx.find_cliques(selected_comm):
    #     if len(cl) >= max_size_clique/2:
    #         lst.append(r.choice(cl))
    #         #return cl
    #         #print len(cl), cl
    # return lst
    #print nx.find_cliques(selected_comm)

    #setup = largest_clique.setup(selected_comm, num_players, num_seeds)
    #return setup

    return strat.setup(selected_comm, num_players, num_seeds)
 def test_clique(self):
     """
     Test that a complet graph of size 2*n has the right behavior when split in two
     """
     n = 5
     g = nx.complete_graph(2*n)
     part = dict([])
     for node in g.nodes() :
         part[node] = node % 2
     ind = co.induced_graph(part, g)
     goal = nx.Graph()
     goal.add_weighted_edges_from([(0,1,n*n),(0,0,n*(n-1)/2), (1, 1, n*(n-1)/2)])
     self.assert_(nx.is_isomorphic(ind, goal))
Esempio n. 19
0
 def test_clique(self):
     """
     Test that a complet graph of size 2*n has the right behavior when split in two
     """
     n = 5
     g = nx.complete_graph(2 * n)
     part = dict([])
     for node in g.nodes():
         part[node] = node % 2
     ind = co.induced_graph(part, g)
     goal = nx.Graph()
     goal.add_weighted_edges_from([(0, 1, n * n), (0, 0, n * (n - 1) / 2),
                                   (1, 1, n * (n - 1) / 2)])
     self.assert_(nx.is_isomorphic(ind, goal))
Esempio n. 20
0
def ppl_plc(diary):

    # make people-place DataFrame by merging on event
    evt_plc = diary[['Event', 'Place']].dropna()

    evt_ppl = diary[['Event', 'Participants']]
    Xie = pd.DataFrame({'Event': diary['Event'], 'Participants': '謝蘭生'})
    evt_ppl = evt_ppl.append(Xie, ignore_index=True)

    ppl_plc = pd.merge(evt_plc, evt_ppl, how='left').dropna().drop('Event',
                                                                   axis=1)
    ppl_plc['Weight'] = 1

    # make edgelist
    edges = ppl_plc.groupby(['Place', 'Participants']).sum().reset_index()
    ppl_dict = pd.Series('People',
                         index=edges['Participants'].unique()).to_dict()
    plc_dict = pd.Series('Place', index=edges['Place'].unique()).to_dict()
    type_dict = {**ppl_dict, **plc_dict}
    edges.rename(columns={
        'Participants': 'Source',
        'Place': 'Target'
    },
                 inplace=True)

    # make the people-place network
    G = nx.from_pandas_dataframe(edges,
                                 source="Source",
                                 target="Target",
                                 edge_attr="Weight")
    nx.set_node_attributes(G, 'Type', type_dict)
    nx.set_node_attributes(G, "k-core", nx.core_number(G))
    communityDict = community.best_partition(G)
    nx.set_node_attributes(G, "community", communityDict)
    nx.write_graphml(G, "Graph/Network/ppl_plc.graphml", encoding="utf-8")

    # make community network
    G_commun = community.induced_graph(communityDict, G)
    nx.write_graphml(G_commun,
                     "Graph/Network/ppl_plc_Community.graphml",
                     encoding="utf-8")

    # export nodes with attributes into a csv
    idx, attr = zip(*G.nodes(data=True))
    core = [d['k-core'] for d in attr]
    commun = [d['community'] for d in attr]
    nodes_attr = pd.DataFrame({'k-core': core, 'community': commun}, index=idx)
    nodes_attr.to_csv("csv/ppl_plc_CoreCommunity.csv", encoding='utf-8')

    return
Esempio n. 21
0
 def test_clique(self):
     """
     Test that a complete graph of size 2*graph_size has the right behavior
     when split in two
     """
     graph_size = 5
     graph = nx.complete_graph(2 * graph_size)
     part = dict([])
     for node in graph.nodes():
         part[node] = node % 2
     ind = co.induced_graph(part, graph)
     goal = nx.Graph()
     edges = [(0, 1, graph_size**2),
              (0, 0, graph_size * (graph_size - 1) / 2),
              (1, 1, graph_size * (graph_size - 1) / 2)]
     goal.add_weighted_edges_from(edges)
     self.assertTrue(nx.is_isomorphic(ind, goal))
 def test_clique(self):
     """
     Test that a complete graph of size 2*graph_size has the right behavior
     when split in two
     """
     graph_size = 5
     graph = nx.complete_graph(2 * graph_size)
     part = dict([])
     for node in graph.nodes():
         part[node] = node % 2
     ind = co.induced_graph(part, graph)
     goal = nx.Graph()
     edges = [(0, 1, graph_size ** 2),
              (0, 0, graph_size * (graph_size - 1) / 2),
              (1, 1, graph_size * (graph_size - 1) / 2)]
     goal.add_weighted_edges_from(edges)
     self.assertTrue(nx.is_isomorphic(ind, goal))
Esempio n. 23
0
def calc_louvain(adj_matrix, level=0, return_c_graph=False):
    nx_G = nx.from_numpy_array(adj_matrix)
    dendro = louvain.generate_dendrogram(
        nx_G, randomize=False, random_state=0)  #Maybe set randomize True
    #print(dendro)
    #asdasd

    level = len(dendro) - level - 1

    if level < 0:
        raise Exception("The given Level is too deep. The maximum is: " +
                        str(len(dendro) - 1))

    communities = louvain.partition_at_level(dendro, level)
    number_communities = max(communities, key=lambda x: communities[x]) + 1

    # Maybe unnecessary after some code rework and unification
    community_list = []
    for i in range(number_communities):
        grp_list = []
        for grp in communities:
            if communities[grp] == i:
                grp_list.append(grp)
        else:
            if grp_list:
                community_list.append(grp_list)

    community_level_G = louvain.induced_graph(communities, nx_G)

    if return_c_graph:
        c_level_graph = nx.adjacency_matrix(community_level_G)
    else:
        c_level_graph = None

    inv_dendro = []
    for dct in dendro:
        inv_dct = {}
        for k, v in dct.items():
            inv_dct.setdefault(v, []).append(k)
        inv_dendro.append(inv_dct)

    return community_list, c_level_graph, dendro, inv_dendro
Esempio n. 24
0
def drawInducedGraph(g, subgraph, partition, algorithm, comm_to_plot, community_d):
	# colors
	comm_to_color_zip = zip(COLORS, comm_to_plot)
	comm_to_color = [color for color, com in comm_to_color_zip]

	fig, ax = plt.subplots()
	# induced graph scoring
	score = community.modularity(partition, subgraph)
	print "Induced Community Score: ", score
	# draw induced graph
	new_er_partition = {node:comm for node, comm in partition.items() if comm in comm_to_plot}
	comm_graph = community.induced_graph(new_er_partition, subgraph)  

	new_partition = {comm:comm for comm in comm_graph.nodes()}
	new_pos = community_layout(comm_graph, new_partition, comm_scale=2000,node_scale=50)	
	weights = [comm_graph[u][v]['weight']/15.0 for u,v in comm_graph.edges()]
	# node_degs = [val*35 for node,val in nx.degree(comm_graph)]
	community_self_loops = [community_d[comm] for comm in comm_graph.nodes()]
	nx.draw(comm_graph, new_pos, cmap=plt.get_cmap('jet'), with_labels=False, arrows=True, node_color=comm_to_color,node_size=community_self_loops,width=weights)
	plt.title("Supernodes from Community Detection Using " + algorithm + " Algorithm \n Modularity Score: " + str(score))
	plt.savefig('new_plots/induced_community_' + algorithm + '_' + LAYOUT_TYPE + '.png')
Esempio n. 25
0
def induced_graph(original_graph,
                  partition,
                  induced_graph=None,
                  rescale_node_size=1.,
                  draw=True,
                  cmap=None,
                  words_map_inv=None,
                  pos=None,
                  betweenness_scaled=None):
    """
    Returns the graph induced from the community partition of the graph
    """
    if (induced_graph == None):
        induced_graph = community.induced_graph(partition,
                                                original_graph,
                                                weight="weight")

    if (draw and cmap):
        if (pos == None):
            pos = nx.spring_layout(induced_graph)
        w = induced_graph.degree(weight="weight")

        sizes = [w[node] * rescale_node_size for node in induced_graph.nodes()]
        nx.draw(induced_graph,
                pos=pos,
                node_size=sizes,
                node_color=[cmap[n] for n in induced_graph.nodes()])

        labels = {}
        for com in induced_graph.nodes():
            rep = max([
                nodes for nodes in partition.keys() if partition[nodes] == com
            ],
                      key=lambda n: original_graph.degree(n, weight="weight"))
            labels[com] = words_map_inv[rep]

        nx.draw_networkx_labels(induced_graph, pos, labels, font_size=16)

    return induced_graph
Esempio n. 26
0
    def draw_graph(self, output_file=None, npmi_threshhold = .7):
        """Draw a graph of used-with connections between packages
        
        @param output_file: file to save graph to; if None, then show graph
        """
        
        G1 = self.G.copy()
        for edge in self.G.edges_iter(data=True):
            if edge[2]["weight"] < npmi_threshhold:
                G1.remove_edge(edge[0], edge[1])
        
        G1 = nx.subgraph(G1, [node for node in G1.nodes() if nx.degree(G1)[node] > 1])        
                
        partition = community.best_partition(G1)
        partpos = nx.spring_layout(community.induced_graph(partition, G1), iterations=100)
        
        forced_partpos = { n : partpos[partition[n]] for n in G1.nodes() }
        print "between"
        pos=nx.spring_layout(G1,pos =forced_partpos,iterations=200)  
        plt.clf()
        plt.figure(figsize=(36,36))
        plt.axis("off")
        
        plt.title('usedwith')

        labels = { node : self.names[node] for node in G1.nodes() }
        nx.draw_networkx_edges(G1,pos, edge_color="#cccccc")
        nx.draw_networkx_nodes(G1,pos,node_size=50, node_color=[hashColor(partition[n]) for n in G1.nodes()])
        nx.draw_networkx_labels(G1,pos,labels=labels)
        if output_file is None:
            plt.show()
        else:
            plt.savefig(output_file, bbox_inches='tight')
        
        
        import pdb
        pdb.set_trace()
def network_plots(l, partition, out, temp):
    # Projection colored and sized by communitu
    pos = community_layout(l, partition)
    min_comm_size = get_top_comm_len(partition, 3)
    print('MIN COMM SIZE: ' + str(min_comm_size))
    c = _color_nodes(l, partition, min_comm_size)
    s = _size_nodes(l, partition, 3)
    nx.draw(l, pos, node_color=c, node_size=s, width=0.3, alpha=0.7)
    plt.savefig(out + '02_comms_projection.png')
    plt.close()

    # Induced network of communities
    ind = induced_graph(partition, l)
    node_size = []
    for comm_node in ind.nodes:
        size = temp[temp['community'] == comm_node]['nodecount'].values[0]
        if size == 1:
            node_size.append(0)
        else:
            node_size.append(np.exp(size))

    nx.draw(ind, node_size=node_size, node_color='black', alpha=0.7, width=0.5)
    plt.savefig(out + 'induced_projection.png')
    plt.close()

    pos = nx.spring_layout(l, k=0.50)
    plt.axis('off')
    nx.draw_networkx(l,
                     node_size=7,
                     with_labels=False,
                     node_color=c,
                     edge_color='black',
                     width=0.3,
                     alpha=0.7,
                     pos=pos)
    plt.savefig(out + 'projection.png')
    plt.close()
def induced_graph_viz(l, partition, partition_df, name):
    partition_df = partition_df.reset_index()
    partition_df = partition_df.groupby([0]).count()
    print(partition_df.head())
    partition_df = partition_df.reset_index()
    print(partition_df.head())
    partition_df.columns = ['community', 'nodecount']
    # Induced network of communities
    ind = induced_graph(partition, l)
    node_size = []
    for comm_node in ind.nodes:
        size = partition_df[partition_df['community'] ==
                            comm_node]['nodecount'].values[0]
        if size == 1:
            node_size.append(0)
        else:
            node_size.append(size**2)

    plt.figure(figsize=(14, 16))
    nx.draw(ind, node_size=node_size, alpha=0.7)
    plt.savefig(name + '_induced_projection.png')
    plt.close()

    return ind
Esempio n. 29
0
def induce_graph_by_communities(graph: nx.Graph,
                                communities: Dict[Any, int],
                                weight_attribute: str = 'weight') -> nx.Graph:
    """
    Creates a community graph with nodes from the communities dictionary
    and using the edges of the original graph to form edges between communities.

    Weights are aggregated; you may need to normalize the resulting graph
    after calling this function.

    Note: logs a warning if the size of the community dictionary is less than
    the size of the provided graph's vertexset.

    :param networkx.Graph graph: The original graph that contains the edges that will be
        used to formulate a new induced community graph
    :param communities: The communities dictionary provides a mapping of
        original vertex ID to new community ID.
    :type communities: dict[Any, int]
    :param str weight_attribute: The weight attribute on the original graph's edges to use
        when aggregating the weights of the induced community graph.  Default is `weight`.
    :return: The induced community graph.
    :rtype: networkx.Graph
    :raises ValueError: If the graph is None
    :raises ValueError: If the communities dictionary is None
    """
    logger = logging.getLogger(__name__)
    if graph is None:
        raise ValueError("graph cannot be None")
    if communities is None:
        raise ValueError("communities cannot be None")
    if len(communities) < len(graph.nodes()):
        logger.warning(
            f"Length of communities provided ({len(communities)}) is less than the "
            + f"total number of nodes in the graph ({len(graph.nodes())})")

    return community.induced_graph(communities, graph, weight_attribute)
Esempio n. 30
0
def augment_graph_data(data, max_groups):

    total_nodes = len(data['nodes'])

    #lowering the necessary node count
    #since in some cases node count is greatly reduced after processing
    # first author kurtz,m goes from ~60 to 19 for instance

    if total_nodes < 15:
        #just get rid of the sets
        for i, l in enumerate(data["links"]):
            data["links"][i]["overlap"] = list(l["overlap"])

        return {"fullGraph": data}

    #create the networkx graph
    G = nx.Graph()
    for i, x in enumerate(data['nodes']):
        G.add_node(i,
                   node_name=x["nodeName"],
                   nodeWeight=x["nodeWeight"],
                   title=x["title"],
                   citation_count=x["citation_count"],
                   first_author=x["first_author"],
                   read_count=x["read_count"])

    for i, x in enumerate(data['links']):
        G.add_edge(x["source"],
                   x["target"],
                   weight=x["value"],
                   overlap=list(x["overlap"]))

    all_nodes = G.nodes()

    #partition is a dictionary with group names as keys
    # and individual node indexes as values
    partition = community.best_partition(G)

    for g in G.nodes():

        G.node[g]["group"] = partition[g]

    #with new group info, create the summary group graph
    summary_graph = community.induced_graph(partition, G)

    #title container
    titles = {}

    #enhance the information that will be in the json handed off to d3
    for x in summary_graph.nodes():
        summary_graph.node[x]["total_citations"] = sum([
            G.node[paper].get("citation_count", 0) for paper in G.nodes()
            if G.node[paper]["group"] == x
        ])
        summary_graph.node[x]["total_reads"] = sum([
            G.node[paper].get("read_count", 0) for paper in G.nodes()
            if G.node[paper]["group"] == x
        ])
        papers = sorted([
            G.node[paper] for paper in G.nodes() if G.node[paper]["group"] == x
        ],
                        key=lambda x: x.get("nodeWeight", 0),
                        reverse=True)
        titles[x] = [p["title"] for p in papers]
        summary_graph.node[x]["paper_count"] = len(papers)

    #attaching title 'word clouds' to the nodes
    significant_words = tf_idf.get_tf_idf_vals(titles)
    for x in summary_graph.nodes():
        #remove the ones with only 1 paper
        if summary_graph.node[x]["paper_count"] == 1:
            summary_graph.remove_node(x)
        else:
            #otherwise, give them a title
            #how many words should we show on the group? max 6, otherwise 1 per every 2 papers
            summary_graph.node[x]["node_label"] = dict(
                sorted(significant_words[x].items(),
                       key=lambda x: x[1],
                       reverse=True)[:6])

#remove all but top n groups from summary graph
#where top n is measured by total citations from a group
    top_nodes = sorted([n for n in summary_graph.nodes(data=True)],
                       key=lambda x: x[1]["total_citations"],
                       reverse=True)[:max_groups]
    top_nodes = [t for t in top_nodes if t >= 1]
    top_node_ids = [n[0] for n in top_nodes]
    for group_id in summary_graph.nodes():
        if group_id not in top_node_ids:
            summary_graph.remove_node(group_id)

#remove nodes from full graph that aren't in top group
#this automatically takes care of edges, too
    for node in G.nodes(data=True):
        if node[1]["group"] not in top_node_ids:
            G.remove_node(node[0])

#continuing to enhance the information: add to group info about the most common co-references
    for x in summary_graph.nodes():
        #make a float so division later to get a percent makes sense
        num_papers = float(summary_graph.node[x]["paper_count"])
        references = {}
        #find all members of group x
        indexes = [
            paperIndex for paperIndex in G.nodes()
            if G.node[paperIndex]["group"] == x
        ]
        for edge in G.edges(data=True):
            #if it passes, it's an inter-group connection
            # [0] is source, [1] is target, [2] is data dict
            paper_one = edge[0]
            paper_two = edge[1]
            if paper_one in indexes and paper_two in indexes:
                for bib in edge[2]["overlap"]:
                    if bib in references:
                        references[bib].update([paper_one, paper_two])
                    else:
                        references[bib] = set([paper_one, paper_two])

        count_references = sorted(references.items(),
                                  key=lambda x: len(x[1]),
                                  reverse=True)[:5]
        top_common_references = [
            (tup[0], float("{0:.2f}".format(len(tup[1]) / num_papers)))
            for tup in count_references
        ]
        top_common_references = dict(top_common_references)
        summary_graph.node[x]["top_common_references"] = top_common_references

    summary_json = json_graph.node_link_data(summary_graph)

    # giving groups node_names based on size of groups
    for i, n in enumerate(
            sorted(summary_json["nodes"],
                   key=lambda x: x["paper_count"],
                   reverse=True)):
        for possible_real_index, node in enumerate(summary_json["nodes"]):
            if node == n:
                real_index = possible_real_index
        summary_json["nodes"][real_index]["node_name"] = i + 1

    for i, n in enumerate(summary_json["nodes"]):
        #cache this so graph manipulation later is easier
        summary_json["nodes"][i]["stable_index"] = i
        #find the node

    final_data = {
        "summaryGraph": summary_json,
        "fullGraph": json_graph.node_link_data(G)
    }
    return final_data
Esempio n. 31
0
#Create network layout for visualizations
spring_pos = nx.spring_layout(G_karate)

plt.axis("off")
nx.draw_networkx(G_karate, pos = spring_pos, with_labels = False, node_size = 80)



############### Community detection ###########
import community as com
parts = com.best_partition(G_karate)
values = [parts.get(node) for node in G_karate.nodes()]



plt.axis("off")
nx.draw_networkx(G_karate, pos = spring_pos, cmap = plt.get_cmap("jet"), node_color = values, font_size=20,node_size = 80, with_labels = False)

## Calculate the modularity ##
com.modularity(parts, G_karate)

## induced graph : each community is represented as one node ##
help(com)
G_induced=com.induced_graph(parts, G_karate)
plt.axis("off")
nx.draw_networkx(G_induced, cmap = plt.get_cmap("jet"),  font_size=20,node_size = 80, with_labels = False)



Esempio n. 32
0
import pubmed
from utils import *
import community
import networkx as net
import matplotlib.pyplot as plot

#articles = pubmed.get_articles('compu')
#open('articles.json','wb').write(json.dumps(articles))

articles = json.loads(open('articles.json','rb').read())

#articles2=[a for a in articles if parser.parse(a['DP'].replace('-',' ').split(' ')[0])>datetime.datetime(2000,1,1)]


aunet=pubmed.make_author_network(articles2)

## removes single links; now will separate into research groups.
components=net.connected_component_subgraphs(trim_edges(aunet,2))

## separate the rest into communities. Plot the overall structure, individual clusters, macrostructure
community.plot_community(components[0], filename='images/largest_community.pdf')
subgraphs=community.plot_partitions(components[0],filename='images/community')

ind=community.induced_graph(community.best_partition(components[0]),components[0])
net.draw(ind)
plot.savefig('images/macrostructure.pdf')


# Matrix2 = [[0 for x in range(size2)] for y in range(size2)]
#
#
# for i in range(0, size2):
#     for j in range(0, size2):
#         if j == i or j < i:
#             continue
#
#         for node in clusters[i]:
#             Matrix2[i][j] += helpers.num_edges(node, clusters[j], Matrix)
#             Matrix2[j][i] += helpers.num_edges(node, clusters[j], Matrix)
#
# Matrix3 = helpers.flip_edge_ratio(Matrix2, size2)
# Matrix4 = helpers.edge_ratio(Matrix2, size2)

G2 = community.induced_graph(labels, G)
size2 = G2.number_of_nodes()
Matrix2 = helpers.create_shortest_path_matrix(G2, size2, "weight")
dFrame2 = DataFrame(Matrix2)
dFrame2.to_csv("Matrix_Files/M2.csv")

# save matrix to file for output to dipha
dipha_utils.writeDistBin(Matrix2, infile2)
#os.system("run_dipha.bat")

# nx.draw(G)
# plt.savefig("G.png")

# nx.draw(G2)
# plt.savefig("G2.png")
Esempio n. 34
0
import networkx as nx
import community as c
import matplotlib.pyplot as plt

# import the graph
G = nx.read_graphml("test1.graphml")

# find communities
dendo = c.generate_dendrogram(G)
for level in range(len(dendo) - 1):
    print("partition at level", level, "is", c.partition_at_level(dendo, level))

partition = c.best_partition(G)
m = c.modularity(partition, G)
print(m)
agglomerate = c.induced_graph(partition, G)

# draw the graph
colors = ["blue", "green", "yellow", "violet", "black", "orange", "cyan", "red", "indigo", "pink"]

plt.figure(1)
size = float(len(set(partition.values())))
pos = nx.spring_layout(G)
count = 0.

for community in set(partition.values()):
    count = count + 1.
    list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == community]
    nx.draw_networkx_nodes(G, pos, list_nodes, node_size=40, node_color=colors[int(count) % 10])

nx.draw_networkx_edges(G, pos, alpha=0.5)
Esempio n. 35
0
    nx.draw_networkx_nodes(G_fb, spring_pos, list_nodes, node_size = 15,
                                node_color = str(count / size))
nx.draw_networkx_edges(G_fb,spring_pos, alpha=0.5)
plt.show()





##### Dendo graph ######

dendo = community.generate_dendogram(G_fb)
for level in range(len(dendo) - 1) :
   print "partition at level", level, "is", community.partition_at_level(dendo, level)



##### induced graph ####

G=community.induced_graph(parts, G_fb)
#nx.draw_networkx(G, pos = spring_pos, cmap = plt.get_cmap("jet"), node_color = values, node_size = 15, with_labels = False)
nx.draw_networkx(G)








def build_json(hierarchy_dict, h5_data, dataset_name, graph, json, threshold):
    # data set dict
    ds_dict = {}

    # graph dict
    g_dict = {}

    # Maximum hierarchy size
    hmax = len(hierarchy_dict["dendro"]) - 1

    # Add pseudo entry to trigger single node dict creation
    hierarchy_dict[hmax + 1] = {}
    for hidx, hdict in hierarchy_dict.items():
        if not isinstance(hidx, int):
            continue
        # Dendrogram list is sorted inversely to hierarchy dict. Therefore, the dendrogram index has to be recalculated.
        didx = hmax - hidx
        # edge dict
        e_dict = {}
        # node dict
        n_dict = {}
        # hierarchy dicr
        h_dict = {}
        if didx > -1:
            # Nodes
            for com, nodes in hierarchy_dict["inv_dendro"][didx].items():
                # attribute dict
                a_dict = {}
                a_dict["index"] = com
                a_dict["name"] = "h%in%i" % (hidx, com)
                a_dict["childs"] = nodes
                a_dict["mzs"] = list(
                    h5_data.columns[hdict["communities"][com]])
                try:
                    a_dict["membership"] = hierarchy_dict["dendro"][didx +
                                                                    1][com]
                except Exception as e:
                    print(e)
                n_dict["h%in%i" % (hidx, com)] = a_dict
        else:
            # single nodes are always first entry in dendro
            for node, com in hierarchy_dict["dendro"][0].items():
                a_dict = {}
                a_dict["index"] = node
                a_dict["name"] = h5_data.columns[node]
                a_dict["membership"] = com
                a_dict["mzs"] = [h5_data.columns[node]]
                n_dict["h%in%i" % (hidx, node)] = a_dict
        # Edges
        if didx > -1:
            community = louvain.partition_at_level(hierarchy_dict["dendro"],
                                                   didx)
            edges = louvain.induced_graph(community, graph).edges(data=True)
        else:
            edges = graph.edges(data=True)
        idx = 0
        for source, target, weight in edges:
            # Include source == target for inner edge weight.
            #print(weight)
            if source != target:
                a_dict = {}
                a_dict["index"] = idx
                a_dict["name"] = "h%ie%i" % (hidx, idx)
                a_dict["source"] = "h%in%i" % (hidx, source)
                a_dict["target"] = "h%in%i" % (hidx, target)
                try:
                    count = weight["count"]
                except:
                    count = 1
                #print(count)
                a_dict["weight"] = weight["weight"] / count
                e_dict["h%ie%i" % (hidx, idx)] = a_dict
                idx += 1

        h_dict["nodes"] = n_dict
        h_dict["edges"] = e_dict
        g_dict["hierarchy%i" % (hidx)] = h_dict

    ds_dict["graph"] = g_dict
    ds_dict["dataset"] = dataset_name
    ds_dict["threshold"] = threshold

    #mzs = [x for x in np.round(h5_data.columns, 3)]
    mzs = [x for x in h5_data.columns]
    mzs_dict = {}
    for mz in mzs:
        mzs_dict[str(mz)] = {}
        for hy, vals in g_dict.items():
            for nid, props in vals["nodes"].items():
                try:
                    if mz in props["mzs"]:
                        mzs_dict[str(mz)][hy] = nid
                        break
                # Last hierarchy has no "mzs" prop
                except Exception as e:
                    print(e)
                    if mz == props["name"]:
                        mzs_dict[str(mz)][hy] = nid

    ds_dict["mzs"] = mzs_dict

    json["graphs"]["graph%i" % (hierarchy_dict["graph_idx"])] = ds_dict

    return json
    #
    # # 		(b). 'coverage' - (note: have to turn partition into a list of sets.)
    # # ???? NOT WORKING AND NOT SURE WHY.
    # partsList = []
    # numParts = part.get( max(part,key=part.get) )
    # for p in range( numParts ):
    # 	partsList.append( set([i for i,j in part.items() if j == p]) )
    # coverage[i] = nx.community.coverage(G, partsList)
    #
    #
    #
    #
    # Looking further into dendrogram.  Makes sense.
    try:
        G2 = c.induced_graph(
            dend[0], G,
            weight='weight')  # define graph turns clusters into nodes.
        q_dend[1, i] = c.modularity(dend[1], G2, weight='weight')
        pp = c.partition_at_level(
            dend,
            1)  # express partition at a give layer in terms of all nodes.
        q_dend[2, i] = c.modularity(pp, G, weight='weight')
    except:
        continue

# Plot modularity metric for different partitions at different resolution parameters.
if False:
    plt.plot(res, q_bp, 'b')
    plt.plot(res, q_dend[0], 'r')
    plt.plot(res, q_dend[1], 'g')
    plt.plot(res, q_dend[2], 'k')
Esempio n. 38
0
plt.axis("off")
nx.draw_networkx(G_lesmis, pos=spring_pos, with_labels=False, node_size=15)

############### Community detection ###########
import community as com
parts = com.best_partition(G_lesmis)
values = [parts.get(node) for node in G_lesmis.nodes()]

plt.axis("off")
nx.draw_networkx(G_lesmis,
                 pos=spring_pos,
                 cmap=plt.get_cmap("jet"),
                 node_color=values,
                 font_size=20,
                 node_size=80,
                 with_labels=False)

## Calculate the modularity ##
com.modularity(parts, G_lesmis)

## induced graph : each community is represented as one node ##
help(com)
G_induced = com.induced_graph(parts, G_lesmis)
plt.axis("off")
nx.draw_networkx(G_induced,
                 cmap=plt.get_cmap("jet"),
                 font_size=20,
                 node_size=80,
                 with_labels=False)
    def extraction(self):
        '''Extract adjacency lists,mats,user and community centrality and communities bags'''

        #Compute the first derivative and the point of timeslot separation
        firstderiv,mentionLimit=self.timeslotselection(self.authors,self.mentions,self.alltime)

        #Split time according to the first derivative of the users' activity#
        sesStart,timeslot,timeLimit=0,0,[self.alltime[0]]
        print("Forming timeslots")
        for k in range(len(mentionLimit)):
            if firstderiv[k]<0 and firstderiv[k+1]>=0:
                #make timeslot timelimit array
                timeLimit.append(self.alltime[int(mentionLimit[k])])
                fileNum='{0}'.format(str(timeslot).zfill(2))
                # print("Forming Timeslot Data "+str(timeslot)+" at point "+str(k))
                sesEnd=int(mentionLimit[k]+1)

                #Make pairs of users with weights
                usersPair=list(zip(self.authors[sesStart:sesEnd],self.mentions[sesStart:sesEnd]))

                #Create weighted adjacency list
                weighted=collections.Counter(usersPair)
                weighted=list(weighted.items())
                adjusrs,weights=zip(*weighted)
                adjauthors,adjments=zip(*adjusrs)
                adjList=list(zip(adjauthors,adjments,weights))

                #Write pairs of users to txt file for Gephi
                my_txt=open(self.dataset_path+"/data/GDD/results/forGephi/usersPairs_"+fileNum+".txt","w")
                my_txt.write("Source,Target,Weight"+"\n")
                for line in adjList:
                    my_txt.write(",".join(str(x) for x in line) + "\n")
                my_txt.close()

                #Create dictionary of tags per user
                tmptags=self.tags[sesStart:sesEnd]
                self.tagBag[timeslot]={}
                for authIdx,auth in enumerate(self.authors[sesStart:sesEnd]):
                    if auth not in self.tagBag[timeslot]:
                        self.tagBag[timeslot][auth]=[]
                    elif tmptags[authIdx]:
                        self.tagBag[timeslot][auth].append(tmptags[authIdx])

                #create dictionary of urls per user
                tmpUrls=self.tweetUrls[sesStart:sesEnd]
                self.urlBag[timeslot]={}
                for authIdx,auth in enumerate(self.authors[sesStart:sesEnd]):
                    if auth not in self.urlBag[timeslot]:
                        self.urlBag[timeslot][auth]=[]
                    elif tmpUrls[authIdx]:
                        self.urlBag[timeslot][auth].append(tmpUrls[authIdx])

                #create dictionary of tweet Ids per user
                tmptweetids=self.tweetIds[sesStart:sesEnd]
                self.tweetIdBag[timeslot]={}
                for authIdx,auth in enumerate(self.authors[sesStart:sesEnd]):
                    if auth not in self.tweetIdBag[timeslot]:
                        self.tweetIdBag[timeslot][auth]=[]
                    elif tmptweetids[authIdx]:
                        self.tweetIdBag[timeslot][auth].append(tmptweetids[authIdx])
                for mentIdx,ment in enumerate(self.mentions[sesStart:sesEnd]):
                    if ment not in self.tweetIdBag[timeslot]:
                        self.tweetIdBag[timeslot][ment]=[]
                    elif tmptweetids[mentIdx]:
                        self.tweetIdBag[timeslot][ment].append(tmptweetids[mentIdx])

                #create dictionary of text per user
                tmptweetText=self.twText[sesStart:sesEnd]
                self.tweetTextBag[timeslot]={}
                for authIdx,auth in enumerate(self.authors[sesStart:sesEnd]):
                    if auth not in self.tweetTextBag[timeslot]:
                        self.tweetTextBag[timeslot][auth]=[]
                    elif tmptweetText[authIdx]:
                        self.tweetTextBag[timeslot][auth].append(tmptweetText[authIdx])
                for mentIdx,ment in enumerate(self.mentions[sesStart:sesEnd]):
                    if ment not in self.tweetTextBag[timeslot]:
                        self.tweetTextBag[timeslot][ment]=[]
                    elif tmptweetText[mentIdx]:
                        self.tweetTextBag[timeslot][ment].append(tmptweetText[mentIdx])

                #Create dictionary of text


                #Construct networkX graph
                tempDiGraph=nx.DiGraph()
                tempDiGraph.add_weighted_edges_from(adjList)
                tempDiGraph.remove_edges_from(tempDiGraph.selfloop_edges())
                tempGraph=nx.Graph()
                tempGraph.add_weighted_edges_from(adjList)
                tempGraph.remove_edges_from(tempGraph.selfloop_edges())

                #Extract the centrality of each user using the PageRank algorithm
                tempUserPgRnk=nx.pagerank(tempDiGraph,alpha=0.85,max_iter=100,tol=0.001)
                maxPGR=max((pgr for k,(pgr) in tempUserPgRnk.items()))
                for k in tempUserPgRnk.items():
                    tempUserPgRnk[k[0]]/=maxPGR
                self.userPgRnkBag[timeslot]=tempUserPgRnk

                #Detect Communities using the louvain algorithm#
                partition = community.best_partition(tempGraph)
                inv_partition = {}
                for k, v in partition.items():
                    inv_partition[v] = inv_partition.get(v, [])
                    inv_partition[v].append(k)
                    inv_partition[v].sort()
                strComms=[inv_partition[x] for x in inv_partition]
                strComms.sort(key=len,reverse=True)

                #Construct Communities of uniqueUsers indices and new community dict with size sorted communities
                numComms,new_partition=[],{}
                for c1,comms in enumerate(strComms):
                    numpart=[]
                    for ids in comms:
                        numpart.extend(self.uniqueUsers[ids])
                        new_partition[ids]=c1
                    numComms.append(numpart)
                newinv_partition = {}
                for k, v in new_partition.items():
                    newinv_partition[v] = newinv_partition.get(v, [])
                    newinv_partition[v].append(k)
                    newinv_partition[v].sort()

                #Construct a graph using the communities as users
                tempCommGraph=community.induced_graph(new_partition,tempDiGraph)

                #Detect the centrality of each community using the PageRank algorithm
                commPgRnk=nx.pagerank(tempCommGraph,alpha=0.85,max_iter=100,tol=0.001)
                maxCPGR=max((cpgr for k,(cpgr) in commPgRnk.items()))
                commPgRnkList=[]
                for key,value in commPgRnk.items():
                    commPgRnkList.append(value/maxCPGR)
                self.commPgRnkBag[timeslot]=commPgRnkList

                '''Construct Community Dictionary'''
                self.commStrBag[timeslot]=strComms
                self.commNumBag[timeslot]=numComms
                sesStart=sesEnd
                timeslot+=1
        day_month=[datetime.datetime.fromtimestamp(int(x)).strftime('%d/%m') for x in timeLimit]
        self.day_month=day_month
        self.timeLimit=[time.ctime(int(x)) for x in timeLimit]
edge_colors = 'red'
pos = nx.spring_layout(G)
nx.draw(G, pos, width=0.5, alpha=0.5)
plt.savefig("First_Graph.png")
nx.draw_networkx_edges(G, pos, width=0.5, alpha=0.5)

partition = community.best_partition(G)

print len(partition)

partition_set = set()
for item in partition.values():
    partition_set.add(item)

induced_graph = community.induced_graph(partition, G)
print induced_graph.edges(data='weight')

pos = nx.spring_layout(induced_graph)
nx.draw(induced_graph, pos, width=0.5, alpha=0.5)
plt.savefig("Induced_Graph.png")
nx.draw_networkx_edges(induced_graph, pos, width=0.5, alpha=0.5)

cluster_size = []
for i in range(168):
    cluster_size.append(0)

for i in range(1, len(partition) + 1):
    cluster_size[partition[i]] += 1
    print partition[i]
Esempio n. 41
0
def augment_graph_data(data, max_groups):

  total_nodes = len(data['nodes']) 

  #lowering the necessary node count
  #since in some cases node count is greatly reduced after processing
  # first author kurtz,m goes from ~60 to 19 for instance

  if total_nodes < 15:   
    #just get rid of the sets
    for i, l in enumerate(data["links"]):
      data["links"][i]["overlap"] = list(l["overlap"])

    return {"fullGraph" :data}

  #create the networkx graph
  G = nx.Graph()
  for i,x in enumerate(data['nodes']):
    G.add_node(i, node_name= x["nodeName"], nodeWeight = x["nodeWeight"], title=x["title"], citation_count=x["citation_count"], first_author = x["first_author"], read_count = x["read_count"])

  for i,x in enumerate(data['links']):
    G.add_edge(x["source"], x["target"], weight = x["value"], overlap = list(x["overlap"]))
   
  all_nodes = G.nodes()

  #partition is a dictionary with group names as keys
  # and individual node indexes as values
  partition = community.best_partition(G)

  for g in G.nodes():

    G.node[g]["group"] = partition[g]

  #with new group info, create the summary group graph
  summary_graph = community.induced_graph(partition, G)

  #title container
  titles = {}

  #enhance the information that will be in the json handed off to d3
  for x in summary_graph.nodes():
    summary_graph.node[x]["total_citations"] = sum([G.node[paper].get("citation_count", 0) for paper in G.nodes() if G.node[paper]["group"] == x])
    summary_graph.node[x]["total_reads"] = sum([G.node[paper].get("read_count", 0) for paper in G.nodes() if G.node[paper]["group"] == x])
    papers = sorted([G.node[paper] for paper in G.nodes() if G.node[paper]["group"] == x], key = lambda x: x.get("nodeWeight", 0), reverse = True)
    titles[x] = [p["title"]for p in papers]
    summary_graph.node[x]["paper_count"] = len(papers)

  #attaching title 'word clouds' to the nodes
  significant_words = tf_idf.get_tf_idf_vals(titles)
  for x in summary_graph.nodes():
    #remove the ones with only 1 paper
    if summary_graph.node[x]["paper_count"] == 1:
      summary_graph.remove_node(x)
    else:
      #otherwise, give them a title
      #how many words should we show on the group? max 6, otherwise 1 per every 2 papers
      summary_graph.node[x]["node_label"] =  dict(sorted(significant_words[x].items(), key = lambda x: x[1], reverse = True)[:6])


 #remove all but top n groups from summary graph
 #where top n is measured by total citations from a group
  top_nodes = sorted([n for n in summary_graph.nodes(data = True)], key= lambda x : x[1]["total_citations"], reverse = True )[:max_groups]
  top_nodes = [t for t in top_nodes if t >=1]
  top_node_ids = [n[0] for n in top_nodes]
  for group_id in summary_graph.nodes():
    if group_id not in top_node_ids:
      summary_graph.remove_node(group_id)

 #remove nodes from full graph that aren't in top group
 #this automatically takes care of edges, too
  for node in G.nodes(data = True):
    if node[1]["group"] not in top_node_ids:
      G.remove_node(node[0])

 #continuing to enhance the information: add to group info about the most common co-references
  for x in summary_graph.nodes():
    #make a float so division later to get a percent makes sense
    num_papers =  float(summary_graph.node[x]["paper_count"])
    references = {}
    #find all members of group x
    indexes =  [paperIndex for paperIndex in G.nodes() if G.node[paperIndex]["group"] == x]
    for edge in G.edges(data=True):
        #if it passes, it's an inter-group connection
        # [0] is source, [1] is target, [2] is data dict
        paper_one = edge[0]
        paper_two = edge[1]
        if paper_one in indexes and paper_two in indexes:
            for bib in edge[2]["overlap"]:
                if bib in references:
                    references[bib].update([paper_one, paper_two])
                else:
                    references[bib] = set([paper_one, paper_two])

    count_references = sorted(references.items(), key=lambda x:len(x[1]), reverse = True)[:5]
    top_common_references = [(tup[0], float("{0:.2f}".format(len(tup[1])/num_papers))) for tup in count_references]
    top_common_references = dict(top_common_references)
    summary_graph.node[x]["top_common_references"] = top_common_references

  summary_json = json_graph.node_link_data(summary_graph)

  # giving groups node_names based on size of groups
  for i, n in enumerate(sorted(summary_json["nodes"], key=lambda x:x["paper_count"], reverse=True)):
    for possible_real_index, node in enumerate(summary_json["nodes"]):
      if node == n:
        real_index = possible_real_index 
    summary_json["nodes"][real_index]["node_name"] = i +1


  for i, n in enumerate(summary_json["nodes"]):
    #cache this so graph manipulation later is easier
    summary_json["nodes"][i]["stable_index"] = i
    #find the node

  final_data = {"summaryGraph" : summary_json, "fullGraph" : json_graph.node_link_data(G) }
  return final_data
Esempio n. 42
0
plt.axis("off")
nx.draw_networkx(G_karate, pos=spring_pos, with_labels=False, node_size=80)

############### Community detection ###########
import community as com
parts = com.best_partition(G_karate)
values = [parts.get(node) for node in G_karate.nodes()]

plt.axis("off")
nx.draw_networkx(G_karate,
                 pos=spring_pos,
                 cmap=plt.get_cmap("jet"),
                 node_color=values,
                 font_size=20,
                 node_size=80,
                 with_labels=False)

## Calculate the modularity ##
com.modularity(parts, G_karate)

## induced graph : each community is represented as one node ##
help(com)
G_induced = com.induced_graph(parts, G_karate)
plt.axis("off")
nx.draw_networkx(G_induced,
                 cmap=plt.get_cmap("jet"),
                 font_size=20,
                 node_size=80,
                 with_labels=False)
Esempio n. 43
0
plt.axis("off")
nx.draw_networkx(G_lesmis, pos = spring_pos, with_labels = False, node_size = 15)



############### Community detection ###########
import community as com
parts = com.best_partition(G_lesmis)
values = [parts.get(node) for node in G_lesmis.nodes()]



plt.axis("off")
nx.draw_networkx(G_lesmis, pos = spring_pos, cmap = plt.get_cmap("jet"), node_color = values, font_size=20,node_size = 80, with_labels = False)


## Calculate the modularity ##
com.modularity(parts, G_lesmis)

## induced graph : each community is represented as one node ##
help(com)
G_induced=com.induced_graph(parts, G_lesmis)
plt.axis("off")
nx.draw_networkx(G_induced, cmap = plt.get_cmap("jet"),  font_size=20,node_size = 80, with_labels = False)





    def extraction(self):
        '''Extract adjacency lists,mats,user and community centrality and communities bags'''
        import community
        #Compute the first derivative and the point of timeslot separation
        firstderiv, mentionLimit = self.timeslotselection(self.authors, self.mentions, self.alltime)

        self.commPgRnkBag = {}

        #Split time according to the first derivative of the users' activity#
        sesStart, timeslot, timeLimit,commCount = 0, 0, [self.alltime[0]],0
        print("Forming timeslots")
        for tmplim in mentionLimit:
            #make timeslot timelimit array
            timeLimit.append(self.alltime[int(tmplim)])
            fileNum = '{0}'.format(str(timeslot).zfill(2))
            # print("Forming Timeslot Data "+str(timeslot)+" at point "+str(tmplim))
            sesEnd = int(tmplim + 1)

            #Make pairs of users with weights
            usersPair = list(zip(self.authors[sesStart:sesEnd], self.mentions[sesStart:sesEnd]))
            #Create weighted adjacency list
            weighted = collections.Counter(usersPair)
            weighted = list(weighted.items())
            adjusrs, weights = zip(*weighted)
            adjauthors, adjments = zip(*adjusrs)
            adjList = list(zip(adjauthors, adjments, weights))

            '''Write pairs of users to txt file for Gephi'''
            my_txt = open(self.dataset_path + "/data/nonadaptive/results/forGephi/usersPairs_" + fileNum + ".txt", "w")#
            my_txt.write("Source,Target,Weight" + "\n")
            for line in adjList:
                my_txt.write(",".join(str(x) for x in line) + "\n")
            my_txt.close()

            '''create dictionaries of text per user, of urls per user,
            of tweet Ids per user and of tags per user'''
            tmptweetText = self.twText[sesStart:sesEnd]
            self.tweetTextBag[timeslot] = {}
            tmpUrls = self.tweetUrls[sesStart:sesEnd]
            self.urlBag[timeslot] = {}
            tmptweetids = self.tweetIds[sesStart:sesEnd]
            self.tweetIdBag[timeslot] = {}
            tmptags = self.tags[sesStart:sesEnd]
            self.tagBag[timeslot] = {}
            for authIdx, auth in enumerate(self.authors[sesStart:sesEnd]):
                if auth not in self.tweetTextBag[timeslot]:
                    self.tweetTextBag[timeslot][auth] = []
                if tmptweetText[authIdx]:
                    self.tweetTextBag[timeslot][auth].append(tmptweetText[authIdx])
                if auth not in self.urlBag[timeslot]:
                    self.urlBag[timeslot][auth] = []
                if tmpUrls[authIdx]:
                    for multUrls in tmpUrls[authIdx]:
                        self.urlBag[timeslot][auth].append(multUrls)
                if auth not in self.tweetIdBag[timeslot]:
                    self.tweetIdBag[timeslot][auth] = []
                if tmptweetids[authIdx]:
                    self.tweetIdBag[timeslot][auth].append(tmptweetids[authIdx])
                if auth not in self.tagBag[timeslot]:
                    self.tagBag[timeslot][auth] = []
                if tmptags[authIdx]:
                    self.tagBag[timeslot][auth].append(tmptags[authIdx])
            for mentIdx, ment in enumerate(self.mentions[sesStart:sesEnd]):
                if ment not in self.tweetTextBag[timeslot]:
                    self.tweetTextBag[timeslot][ment] = []
                if tmptweetText[mentIdx]:
                    self.tweetTextBag[timeslot][ment].append(tmptweetText[mentIdx])
                if ment not in self.tweetIdBag[timeslot]:
                    self.tweetIdBag[timeslot][ment] = []
                if tmptweetids[mentIdx]:
                    self.tweetIdBag[timeslot][ment].append(tmptweetids[mentIdx])

            '''Construct networkX graph'''
            tempDiGraph = nx.DiGraph()
            tempDiGraph.add_weighted_edges_from(adjList)
            tempDiGraph.remove_edges_from(tempDiGraph.selfloop_edges())
            tempGraph = nx.Graph()
            tempGraph.add_weighted_edges_from(adjList)
            tempGraph.remove_edges_from(tempGraph.selfloop_edges())

            '''Extract the centrality of each user using the PageRank algorithm'''
            tempUserPgRnk = nx.pagerank(tempDiGraph, alpha=0.85, max_iter=100, tol=0.001)
            maxPGR=max((pgr for k,(pgr) in tempUserPgRnk.items()))
            for k in tempUserPgRnk.items():
                tempUserPgRnk[k[0]]/=maxPGR
            self.userPgRnkBag[timeslot] = tempUserPgRnk

            '''Detect Communities using the louvain algorithm'''
            partition = community.best_partition(tempGraph)
            inv_partition = {}
            for k, v in partition.items():
                inv_partition[v] = inv_partition.get(v, [])
                inv_partition[v].append(k)
                inv_partition[v].sort()
            strComms = [inv_partition[x] for x in inv_partition]
            strComms.sort(key=len, reverse=True)
            commCount+=len(strComms)

            '''Construct Communities of uniqueUsers indices and new community dict with size sorted communities'''
            numComms, new_partition = [], {}
            for c1, comms in enumerate(strComms):
                numpart = []
                for ids in comms:
                    numpart.extend(self.uniqueUsers[ids])
                    new_partition[ids] = c1
                numpart.sort()
                numComms.append(numpart)
            newinv_partition = {}
            for k, v in new_partition.items():
                newinv_partition[v] = newinv_partition.get(v, [])
                newinv_partition[v].append(k)
                newinv_partition[v].sort()

            '''Construct a graph using the communities as users'''
            tempCommGraph = community.induced_graph(new_partition, tempDiGraph)
            self.commGraph=tempCommGraph

            '''Detect the centrality of each community using the PageRank algorithm'''
            commPgRnk = nx.pagerank(tempCommGraph, alpha=0.85, max_iter=100, tol=0.001)
            maxCPGR = max((cpgr for k, (cpgr) in commPgRnk.items()))
            commPgRnkList = []
            for key, value in commPgRnk.items():
                commPgRnkList.append(value/maxCPGR)
            self.commPgRnkBag[timeslot] = commPgRnkList

            # #Detect the centrality of each community using the degree centrality algorithm
            # commDegreeness = nx.degree_centrality(tempCommGraph)
            # maxCDeg = max((cpgr for k, (cpgr) in commDegreeness.items()))
            # commDegreenessList = []
            # for key, value in commDegreeness.items():
            #     commDegreenessList.append(value/maxCDeg)
            # self.commDegreenessBag[timeslot] = commDegreenessList

            # #Detect the centrality of each community using the betweeness centrality algorithm
            # commBetweeness = nx.betweenness_centrality(tempCommGraph)
            # maxCBet = max((cpgr for k, (cpgr) in commBetweeness.items()))
            # commBetweennessList = []
            # for key, value in commBetweeness.items():
            #     commBetweennessList.append(value/maxCDeg)
            # self.commBetweenessBag[timeslot] = commBetweennessList

            # #Extract community degree
            # degreelist=[]
            # for k in range(len(tempCommGraph.edge)):
            #     tmpdeg=tempCommGraph.degree(k)
            #     degreelist.append(tmpdeg)
            # degreelist=[x/max(degreelist) for x in degreelist]
            # self.degreeBag[timeslot]=degreelist

            '''Construct Community Dictionary'''
            self.commStrBag[timeslot] = strComms
            self.commNumBag[timeslot] = numComms
            sesStart = sesEnd
            timeslot += 1

        day_month = [datetime.datetime.fromtimestamp(int(x)).strftime(self.labelstr) for x in timeLimit]
        self.day_month = day_month
        self.timeLimit = [datetime.datetime.fromtimestamp(int(x)).strftime(self.labelstr) for x in timeLimit]
        statement = '\nTotal # of communities is '+str(commCount) + '\n'
        statsfile = open(self.dataset_path + "/data/nonadaptive/results/basicstats.txt",'a')
        print(statement)
        statsfile.write(statement)
        statsfile.close()

        dataCommPck = open(self.dataset_path + '/data/nonadaptive/tmp/dataComm_'+str(self.fileTitle)+'.pck','wb')
        pickle.dump(self, dataCommPck , protocol = 2)
        dataCommPck.close()
Esempio n. 45
0
def run_comm(inputcID,subids):
    con = mdb.connect(user= '******', passwd='X', db='X',unix_socket='X',charset='utf8')
    with con:
        cur = con.cursor()
        cur.execute("SELECT cID,tocID FROM CommenterSubs WHERE inputID=%s and tocID<>'NotShared'",(inputcID))# (tocID<>'None' AND cID<>'None')")# limit 1000")
        edges = cur.fetchall()
        cur.close()
    edges_pulled = [list(x) for x in set(tuple(x) for x in edges)]
    print "Number of edges pulled from the database", len(edges_pulled)
    uedges=[]
    for i,edge_i in enumerate(edges_pulled):
        if edge_i[0] in subids:
            uedges.append(edge_i)
    print "Number of relevant edges for the inputcID",len(uedges)
    G=nx.Graph()
    for edge in uedges:
        G.add_edge(edge[0],edge[1],weight=0.5)
        #print edge[0],edge[1]
    print "Number of edges", G.number_of_edges(),", number of nodes",G.number_of_nodes()
    
    import community as comm
    #dendo = comm.generate_dendogram(G) #takes a long time and unnecessary 
    part = comm.best_partition(G)
    modularity=comm.modularity(part, G) 
    print "Number of communities found",max(part.values())+1, ", modularity:",modularity
    count = 0.
    commf=0; #community to which input channel belongs
    nodesf=[] #nodes of the community to which input channel belongs
    nodepcom=[]
    label_prep=["" for x in range(len(part.values()))]
    for com in set(part.values()) :
        count = count + 1.
        list_nodes = [nodes for nodes in part.keys() if part[nodes] == com]
        nodepcom.append(len(list_nodes))
        label_prep[int(com)]=str(len(list_nodes))
        if inputcID in list_nodes:
            print "Input Channel is in the community #", com
            commf=com
            nodesf=list_nodes
            label_prep[int(com)]="Target community: "+str(len(list_nodes))
    labs=dict(zip(set(part.values()), label_prep))
    plt.figure()
    h1=plt.hist(nodepcom,bins=20,normed=False,color='steelblue')
    h2=plt.axvline(int(np.average(nodepcom)),0,1,color='navy',linewidth=10, label='average community size = '+str(int(np.average(nodepcom))))
    plt.legend(fontsize=22)
    plt.title('Network modularity Q = %.4f ' % (modularity),fontsize=30) 
    plt.xlabel('Number of channels per community', fontsize=26)
    plt.ylabel('Frequency', fontsize=26)
    plt.savefig('./static/img/'+inputcID+'/'+inputcID+'_com_size_distrib_1.png', dpi=300, format='png')#,transparent=True)
    com1=comm.induced_graph(part, G)
    # plt.figure()
    # pos = nx.spring_layout(com1)
    # nx.draw_networkx_edges(com1,pos,width=1.0, edge_color='g', style='solid', alpha=0.2)
    # nx.draw_networkx_labels(com1, pos, labels=labs, font_size=12, font_color='r', font_family='sans-serif', font_weight='normal', alpha=1.0)
    # plt.draw()
    # plt.savefig('./static/img/'+inputcID+'/'+inputcID + '_v3_com_plot_all_1.png', dpi=300, format='png',transparent=True)
    # #export to json for d3 graph plot
    i=0
    comfin=com1
    for node in com1.nodes():
        comfin.node[i]['group'] = i
        comfin.node[i]['label'] = label_prep[i]
        i=i+1
    comfin.nodes(data=True)
    nld=json_graph.node_link_data(comfin)
    json.dump(nld,open('./static/img/'+inputcID+'/'+inputcID+'_community_graph_comsub2.json','w'))
    return nodesf