def find_common_clique(Gd, curr, wk, conceptnet_words, Tdict, Tnode_dict): gr = nx.Graph(Gd[curr]) # convert to undirected graph w1 = min(wk) w2 = max(wk) wk = (w1, w2) w1 = wk[0] w2 = wk[1] cli1 = sorted(nx.cliques_containing_node(gr, w1), key=lambda s: len(s), reverse=True) cli2 = sorted(nx.cliques_containing_node(gr, w2), key=lambda s: len(s), reverse=True) if wk in Tdict[curr]: print " graph already exist: %s, %s" % (curr, repr(wk)) return [], [], [] # find the largest common clique, size >= 3 cnodes = [] for i1 in range(len(cli1)): if len(cli1[i1]) < 3: break for i2 in range(len(cli2)): if len(cli2[i2]) == len(cli1[i1]) and sorted(cli2[i2]) == sorted( cli1[i1]): cnodes = sorted(cli2[i2]) break if len(cnodes) > 0: break if len(cnodes) > 0: all_nodes = tuple(sorted(cnodes)) if all_nodes in Tnode_dict: print " node dup exists: %s, %s" % (repr(wk), curr) return [], [], [] # already exist else: Tnode_dict[all_nodes] = 'C' curg = nx.DiGraph() for u in range(len(cnodes)): un = cnodes[u] add_conceptnet_edges(curg, conceptnet_words, un, un) for v in range(len(cnodes)): vn = cnodes[v] if not vn == un: add_conceptnet_edges(curg, conceptnet_words, un, vn) else: print " no good cliques found: %s, %s" % (repr(wk), curr) return [], [], [] return curg, wk, cnodes
def find_common_clique(Gd, curr, wk, conceptnet_words, Tdict, Tnode_dict): gr = nx.Graph(Gd[curr]) # convert to undirected graph w1 = min(wk) w2 = max(wk) wk = (w1, w2) w1 = wk[0] w2 = wk[1] cli1 = sorted(nx.cliques_containing_node(gr, w1), key=lambda s:len(s), reverse=True) cli2 = sorted(nx.cliques_containing_node(gr, w2), key=lambda s:len(s), reverse=True) if wk in Tdict[curr]: print " graph already exist: %s, %s" % (curr, repr(wk)) return [], [], [] # find the largest common clique, size >= 3 cnodes = [] for i1 in range(len(cli1)): if len(cli1[i1])<3: break for i2 in range(len(cli2)): if len(cli2[i2]) == len(cli1[i1]) and sorted(cli2[i2])==sorted(cli1[i1]): cnodes = sorted(cli2[i2]) break if len(cnodes) > 0: break if len(cnodes) > 0: all_nodes = tuple(sorted(cnodes)) if all_nodes in Tnode_dict: print " node dup exists: %s, %s" % (repr(wk), curr) return [], [], [] # already exist else: Tnode_dict[all_nodes] = 'C' curg = nx.DiGraph() for u in range(len(cnodes)): un = cnodes[u] add_conceptnet_edges(curg, conceptnet_words, un, un) for v in range(len(cnodes)): vn = cnodes[v] if not vn==un: add_conceptnet_edges(curg, conceptnet_words, un, vn) else: print " no good cliques found: %s, %s" % (repr(wk), curr) return [], [], [] return curg, wk, cnodes
def cliqueCounts(self, cdata, k): tdata = [] cliques = nx.cliques_containing_node(self.graph, [d[0] for d in cdata]) for d in cdata: cl = len([c for c in cliques[d[0]] if len(c) >= k]) tdata.append((d[1], cl)) return tdata
def get_node_cliques_metrics(self, node) -> dict: """ :param node: :return: """ out = { 'node_cliques_size_avg': 0, 'node_cliques_size_std': 0, 'node_cliques_size_max': 0, 'node_number_of_cliques': 0 } cliques = nx.cliques_containing_node(self.undirected_graph, nodes=node) if cliques: clique_sizes = [len(c) for c in cliques] out['node_cliques_size_avg'] = np.mean(clique_sizes) out['node_cliques_size_std'] = np.std(clique_sizes) out['node_cliques_size_max'] = max(clique_sizes) out['node_number_of_cliques'] = len(cliques) return out
def main(): titles = open("fox titles.txt") titles = fileToList(titles) names = open("fox vertex.txt") names = fileToList(names) d1,d2 = todic(titles,names) ylvis = "jofNR_WkoCE" g = nx.Graph() g =nx.read_graphml("foxsay.graphml") outdeg =g.degree() rm = [n for n in outdeg if outdeg[n] ==0] g.remove_nodes_from(rm) jaccard(g,d1,d2) print compute_jaccard_index(finn_nabo(g,"xO_a7OKmh7Q"),finn_nabo(g,"xiKiAlv9wWw")) q = nx.cliques_containing_node(g,ylvis) print "cliques containing the fox: " d2["jofNR_WkoCE"]= "ylvis original fox -tvn" for clique in q: print len(clique) for node in clique: print d2[str(node)[:40]] print "\n" return 0
def cliques(data, subreddit): subreddits, authors, M = data d = cdist(M, M) adj = d < np.mean(d) G = nx.convert_matrix.from_numpy_matrix(adj) cliques = nx.cliques_containing_node(G, list(subreddits).index(subreddit)) for c in cliques: print(np.array(subreddits)[c])
def tree_decomposition(input_graph): current_graph=input_graph.copy() current_graph.remove_edges_from(current_graph.selfloop_edges()) decomposition_tree_vertices=list() counter=0; decomposition_tree=nx.Graph() tree_connectivity_dictionary=dict() for graph_vertex in current_graph.nodes(): tree_connectivity_dictionary[graph_vertex]=[] while current_graph.order()>0: nodes_sorted_by_degree=sort_by_degree(current_graph) minimum_degree_vertex=nodes_sorted_by_degree[0][0] cliques_of_minimum_degree_vertex=nx.cliques_containing_node(current_graph,minimum_degree_vertex) number_of_cliques_containing_vertex=len(cliques_of_minimum_degree_vertex) minimum_degree_vertex_neighbors=current_graph.neighbors(minimum_degree_vertex) new_tree_vertex=[minimum_degree_vertex] new_tree_vertex.extend(minimum_degree_vertex_neighbors) new_tree_vertex=tuple(new_tree_vertex) decomposition_tree.add_node(new_tree_vertex) if number_of_cliques_containing_vertex>1: pairs_of_neighbors=make_pairs(minimum_degree_vertex_neighbors) for additional_edge in pairs_of_neighbors:current_graph.add_edge(additional_edge[0],additional_edge[1]) toberemoved=[minimum_degree_vertex] else: toberemoved=[minimum_degree_vertex] number_of_clique_edges_per_vertex=len(minimum_degree_vertex_neighbors) for temp_vertex in minimum_degree_vertex_neighbors: if current_graph.degree(temp_vertex)==number_of_clique_edges_per_vertex: toberemoved.append(temp_vertex) for graph_vertex in new_tree_vertex: if graph_vertex in toberemoved: current_graph.remove_node(graph_vertex) tree_vertices_waiting=tree_connectivity_dictionary[graph_vertex] for tree_vertex_waiting in tree_vertices_waiting: decomposition_tree.add_edge(new_tree_vertex,tree_vertex_waiting) temp_copy_tree_vertices_waiting=tree_vertices_waiting[:] for tree_vertex_waiting in temp_copy_tree_vertices_waiting: common_graph_nodes_between_tree_vertices=my_very_simple_tuple_intersection(new_tree_vertex,tree_vertex_waiting) for candidate in common_graph_nodes_between_tree_vertices: if tree_vertex_waiting in tree_connectivity_dictionary[candidate]:tree_connectivity_dictionary[candidate].remove(tree_vertex_waiting) del tree_connectivity_dictionary[graph_vertex] else: tree_connectivity_dictionary[graph_vertex].append(new_tree_vertex) if ((decomposition_tree.number_of_nodes()-decomposition_tree.number_of_edges()) < 1): print 'WARNING WARNING WARNING: THE OUTPUT GRAPH IS ****NOT**** A TREE, IT INCLUDES CYCLES' elif ((decomposition_tree.number_of_nodes()-decomposition_tree.number_of_edges()) > 1): print 'WARNING WARNING WARNING: THE OUTPUT GRAPH IS ****NOT**** A TREE, IT IS DISCONNECTED' return decomposition_tree
def _annotate_cliques(self, graph, min_clique_size=3): cliques = list(nx.find_cliques(graph)) for u in graph.nodes(): cliques_list = nx.cliques_containing_node( graph, nodes=u, cliques=cliques) trimmed_cliques_list = [c for c in cliques_list if len(c) >= min_clique_size] if len(trimmed_cliques_list) > 0: graph.node[u]['cliques'] = trimmed_cliques_list
def Cliques(G) : """ Return a list of cliques the player belongs to """ nx.find_cliques(G) cliques = [] for i in G.nodes() : cliques.append(nx.cliques_containing_node(G, i)) return cliques
def _get_cliques_dict(node): """ Returns a dictionary in the form of {node: cliques_formed} of the node along with its neighboring nodes. clique_dict_removed would be containing the cliques created after deletion of the node clique_dict_node would be containing the cliques created before deletion of the node """ graph_working_copy = nx.Graph(graph_copy.edges()) neighbors = graph_working_copy.neighbors(node) graph_working_copy.add_edges_from(itertools.combinations(neighbors, 2)) clique_dict = nx.cliques_containing_node(graph_working_copy, nodes=([node] + neighbors)) graph_working_copy.remove_node(node) clique_dict_removed = nx.cliques_containing_node(graph_working_copy, nodes=neighbors) return clique_dict, clique_dict_removed
def findPerfectGroup(Event, User): User = UserHandler(User) PerfectGroups = nx.cliques_containing_node(Event.G(), User.id) for i in PerfectGroups: print(i) if len(i) is Event.groupSize: for j in i: Event._removeUserFromGraph(j) return GroupHandler.createGroup(i, Event) return None
def create_edge(G, K, node_1, node_2): if node_1 != node_2: G.add_edge(node_1, node_2) for clique in nx.cliques_containing_node(G, nodes=node_1): if node_2 in clique and len(clique) > K: G.remove_edge(node_1, node_2) break return G
def all_cliques(self, spectrum): """ Returns all cliques (based on peaks in given spectrum). """ peaks = self[spectrum].peaks cliques = [] for peak in peaks: current = nx.cliques_containing_node(self.G, peak) cliques += [self._clique(clique) for clique in current] return cliques
def _annotate_cliques(self, graph, min_clique_size=3): cliques = list(nx.find_cliques(graph)) for u in graph.nodes(): cliques_list = nx.cliques_containing_node(graph, nodes=u, cliques=cliques) trimmed_cliques_list = [ c for c in cliques_list if len(c) >= min_clique_size ] if len(trimmed_cliques_list) > 0: graph.node[u]['cliques'] = trimmed_cliques_list
def generate_subgraph(self): v = random.randint(0,self.num_vertices-1) s = networkx.cliques_containing_node(self.G, [v]) for q in s[v]: if len(q) >= self.size_subgraph: value = random.uniform(0, self.max_value) for v in q: self.values[v] = value self.subgraph = q print q break
def clique_generator(self, spectrum): """ Returns all cliques (based on peaks in given spectrum). """ peaks = self[spectrum].peaks order = np.random.permutation(len(peaks)) for i in order: peak = peaks[i] cliques = nx.cliques_containing_node(self.G, peak) for clique in cliques: yield self._clique(clique)
def init(G): """ Algorithm 2 that initializes the key variables for the RMC algorithm Checks if the input graph is already not a maximum clique If so, returns the max cliqu This is a line by line implementation of the algorithm in the paper :param G: :return: r, wc_min, wc_max, and the initial max_clique """ lis_nodes = G.nodes() wc_min = 0 max_core_number = -100 core_number_dict = nx.core_number(G) for k, v in core_number_dict.items(): if v > max_core_number: max_core_number = v print(max_core_number) wc_max = max_core_number + 1 max_core = nx.k_core(G, max_core_number) # check if max-core is a clique num_nodes = len(max_core.nodes()) num_edges = max_core.number_of_edges() if num_edges == (num_nodes * (num_nodes - 1)) // 2: print("max_core is a clique, hence must be maximum clique") print("returning maximum clique") wc_min = num_nodes wc_max = num_nodes + 1 return (1, wc_min, wc_max, max_core) for k, v in core_number_dict.items(): if v > wc_max: # find the maximal clique containing that node cliques = nx.cliques_containing_node(G, k) node_maximal_clique = max(cliques, key=lambda clique: clique.number_of_nodes()) if node_maximal_clique.number_of_nodes() > wc_min: cmax = node_maximal_clique wc_min = cmax.number_of_nodes() if wc_min == wc_max: return 1, wc_min, wc_max, max_core d = nx.coloring.greedy_color(G, strategy='largest_first') # color number of graph through graph coloring - line 12 color_set = set() for _, v in d.items(): color_set.add(v) cn = len(color_set) if wc_max > cn: wc_max = cn if wc_max == wc_min: return 1, wc_min, wc_max, max_core return 0, wc_min, wc_max, max_core
def find_clique(uid,p_ave_degree=50): buffered = get_buffer_clique(uid) if(buffered != False): return buffered G = nx.DiGraph() con = mdb.connect('localhost','root','root','weibo_ranker') cur = con.cursor() sql = "select uid from (select `to` from vip_relation where `from`=%d) as vip left join people_rank4 on vip.`to`=people_rank4.uid where flag < 1"%(uid) cur.execute(sql) users = cur.fetchall() user_list = [uid] for user in users: G.add_edge(uid,user[0]) user_list.append(user[0]) for user in user_list: sql = "select uid from (select `to` from vip_relation where `from`=%d) as vip left join people_rank4 on vip.`to`=people_rank4.uid where flag < 1"%(user) cur.execute(sql) results = cur.fetchall() for result in results: if(result[0] in user_list): G.add_edge(user,result[0]) if(len(G.nodes()) == 0): save_clique(uid,[]) return [] #decomplex by deleting nodes decomplex(G,uid,200) dG = G.to_undirected(True) sub_graph_node_list = dG.neighbors(uid) sub_graph_node_list.append(uid) dG = dG.subgraph(sub_graph_node_list) dG_node_number = len(dG.nodes()) cliques = nx.cliques_containing_node(dG,uid) clique_user_dic = {} for clique in cliques: if(len(clique) > 2): for user in clique: if(user == uid ): continue if(isset(clique_user_dic,user)): clique_user_dic[user] += 1 else: clique_user_dic[user] = 1 clique_user_arr = sorted(clique_user_dic.iteritems(), key=itemgetter(1), reverse=True) save_clique(uid,clique_user_arr) return clique_user_arr
def test_cliques_containing_node(self): G=self.G assert_equal(nx.cliques_containing_node(G,1), [[2, 6, 1, 3]]) assert_equal(list(nx.cliques_containing_node(G,[1]).values()), [[[2, 6, 1, 3]]]) assert_equal(list(nx.cliques_containing_node(G,[1,2]).values()), [[[2, 6, 1, 3]], [[2, 6, 1, 3], [2, 6, 4]]]) assert_equal(nx.cliques_containing_node(G,[1,2]), {1: [[2, 6, 1, 3]], 2: [[2, 6, 1, 3], [2, 6, 4]]}) assert_equal(nx.cliques_containing_node(G,1), [[2, 6, 1, 3]]) assert_equal(nx.cliques_containing_node(G,2), [[2, 6, 1, 3], [2, 6, 4]]) assert_equal(nx.cliques_containing_node(G,2,cliques=self.cl), [[2, 6, 1, 3], [2, 6, 4]]) assert_equal(len(nx.cliques_containing_node(G)),11)
def graph_iter(self, G, **kwargs): sv = kwargs['shadow_val'] for root_node in G.nodes_iter(): n_freq = defaultdict(int) if G.node[root_node][kwargs['attr_to_del']] != sv: continue cliques = [] for aclique_raw in nx.cliques_containing_node(G, nodes=root_node): cliques.append(add_node_attrs(aclique_raw, G)) yield { 'subgraph': cliques, 'root_node': root_node }
def tree_decomposition(input_graph): current_graph=input_graph.copy() decomposition_tree_vertices=list() counter=0; decomposition_tree=nx.Graph() tree_connectivity_dictionary=dict() for graph_vertex in current_graph.nodes(): tree_connectivity_dictionary[graph_vertex]=[] while current_graph.order()>0: nodes_sorted_by_degree=sort_by_degree(current_graph) minimum_degree_vertex=nodes_sorted_by_degree[0][0] cliques_of_minimum_degree_vertex=nx.cliques_containing_node(current_graph,minimum_degree_vertex) number_of_cliques_containing_vertex=len(cliques_of_minimum_degree_vertex) minimum_degree_vertex_neighbors=current_graph.neighbors(minimum_degree_vertex) new_tree_vertex=[minimum_degree_vertex] new_tree_vertex.extend(minimum_degree_vertex_neighbors) new_tree_vertex=tuple(new_tree_vertex) decomposition_tree.add_node(new_tree_vertex) if number_of_cliques_containing_vertex>1: pairs_of_neighbors=make_pairs(minimum_degree_vertex_neighbors) for additional_edge in pairs_of_neighbors:current_graph.add_edge(additional_edge[0],additional_edge[1]) toberemoved=[minimum_degree_vertex] else: toberemoved=[minimum_degree_vertex] number_of_clique_edges_per_vertex=len(minimum_degree_vertex_neighbors) for temp_vertex in minimum_degree_vertex_neighbors: if current_graph.degree(temp_vertex)==number_of_clique_edges_per_vertex: toberemoved.append(temp_vertex) for graph_vertex in new_tree_vertex: if graph_vertex in toberemoved: current_graph.delete_node(graph_vertex) tree_vertices_waiting=tree_connectivity_dictionary[graph_vertex] for tree_vertex_waiting in tree_vertices_waiting: decomposition_tree.add_edge(new_tree_vertex,tree_vertex_waiting) for tree_vertex_waiting in tree_vertices_waiting: common_graph_nodes_between_tree_vertices=list(my_very_simple_tuple_intersection(new_tree_vertex,tree_vertex_waiting)) for graph_vertex in common_graph_nodes_between_tree_vertices: tree_connectivity_dictionary[graph_vertex].remove(tree_vertex_waiting) else: tree_connectivity_dictionary[graph_vertex].append(new_tree_vertex) return decomposition_tree
def test_cliques_containing_node(self): G = self.G assert nx.cliques_containing_node(G, 1) == [[2, 6, 1, 3]] assert list(nx.cliques_containing_node(G, [1]).values()) == [[[2, 6, 1, 3]]] assert [ sorted(c) for c in list(nx.cliques_containing_node(G, [1, 2]).values()) ] == [[[2, 6, 1, 3]], [[2, 6, 1, 3], [2, 6, 4]]] result = nx.cliques_containing_node(G, [1, 2]) for k, v in result.items(): result[k] = sorted(v) assert result == {1: [[2, 6, 1, 3]], 2: [[2, 6, 1, 3], [2, 6, 4]]} assert nx.cliques_containing_node(G, 1) == [[2, 6, 1, 3]] expected = [{2, 6, 1, 3}, {2, 6, 4}] answer = [set(c) for c in nx.cliques_containing_node(G, 2)] assert answer in (expected, list(reversed(expected))) answer = [set(c) for c in nx.cliques_containing_node(G, 2, cliques=self.cl)] assert answer in (expected, list(reversed(expected))) assert len(nx.cliques_containing_node(G)) == 11
def create_group(session, graph, group_limit, group_size_limit): nodes = dict() while (len(graph) > 1): node = choice(graph.nodes()) node_reached_group_limit = False long_cliques = [ c for c in nx.cliques_containing_node(graph, nodes=[node])[node] if len(c) > 1 ] if len(long_cliques) == 0: graph.remove_node(node) continue cliques = sorted(long_cliques, key=lambda x: len(x), reverse=True) for clique in cliques: if len(clique) < 2: break l = min(len(clique), group_size_limit) clique.remove(node) group = sample(clique, l - 1) + [node] available = True for n in group: nodes.setdefault(n, 0) if nodes[n] == group_limit: available = False break if available: new_group = Group(group_limit=group_limit, group_size_limit=group_size_limit) session.add(new_group) session.commit() for n in group: membership = Membership(group_id=new_group.id, node_id=n) session.add(membership) session.commit() nodes[n] += 1 if nodes[n] == group_limit: graph.remove_node(n) if n == node: node_reached_group_limit = True if node_reached_group_limit: break return
def compute_cliques(self, node_id=None): # pragma: no cover """ Computes all maximum complete subgraphs for the given graph. If a node_id is given, method will return only the complete subgraphs that contain that node Parameters ---------- node_id : int Integer value for a given node Returns ------- : list A list of lists of node ids that make up maximum complete subgraphs of the given graph """ if node_id is not None: return list(nx.cliques_containing_node(self, nodes=node_id)) else: return list(nx.find_cliques(self))
def test_cliques_containing_node(self): G = self.G assert_equal(nx.cliques_containing_node(G, 1), [[2, 6, 1, 3]]) assert_equal(list(nx.cliques_containing_node(G, [1]).values()), [[[2, 6, 1, 3]]]) assert_equal([ sorted(c) for c in list(nx.cliques_containing_node(G, [1, 2]).values()) ], [[[2, 6, 1, 3]], [[2, 6, 1, 3], [2, 6, 4]]]) result = nx.cliques_containing_node(G, [1, 2]) for k, v in result.items(): result[k] = sorted(v) assert_equal(result, {1: [[2, 6, 1, 3]], 2: [[2, 6, 1, 3], [2, 6, 4]]}) assert_equal(nx.cliques_containing_node(G, 1), [[2, 6, 1, 3]]) expected = [{2, 6, 1, 3}, {2, 6, 4}] answer = [set(c) for c in nx.cliques_containing_node(G, 2)] assert_in(answer, (expected, list(reversed(expected)))) answer = [ set(c) for c in nx.cliques_containing_node(G, 2, cliques=self.cl) ] assert_in(answer, (expected, list(reversed(expected)))) assert_equal(len(nx.cliques_containing_node(G)), 11)
def iter_feature1(G,node1,node2): # Extract graph features for a given pair of nodes # return a data frame of features feat_out=[] try: # Length of the shortest path feat = nx.shortest_path_length(G,node1,node2) feat_out.append(feat) # Number of shortest path feat = len(list(nx.all_shortest_paths(G,node1,node2))) feat_out.append(feat) except nx.NetworkXNoPath: feat_out.extend([0,0]) # first order neighbood overlap feat1,feat2 = neighbor_overlap_orderK(G,node1,node2,1) feat_out.extend([feat1,feat2]) # second order neighbood overlap feat1,feat2 = neighbor_overlap_orderK(G,node1,node2,2) feat_out.extend([feat1,feat2]) # average neighbor degree #feat1,feat2 = nx.average_neighbor_degree(G,nodes=[node1,node2]).values() #feat_out.extend([feat1,feat2]) # Connectivity feat = nx.node_connectivity(G,node1,node2) feat_out.append(feat) # whether the nodes are in the same cluster feat = in_same_cluster(nx.cliques_containing_node(G,node1),node2) feat_out.append(feat) return feat_out
def dist_graph(pipe: Pipe, threshold: float, layout: str, cliques: bool): """Build a threshold graph, presumes pairwise_distances. """ import networkx as nx assert pipe.matrix.index.values.shape == pipe.matrix.columns.values.shape, "call pdist first" samples = pipe.matrix.index.values weights = pipe.matrix.values n_samples = samples.shape[0] max_w = np.max(weights) min_w = np.min(weights + np.eye(n_samples) * max_w) graph = nx.Graph() weight_values = [] for (i, sa), (j, sb) in product(enumerate(samples), enumerate(samples)): if i != j: w = 1 - (weights[i, j] - min_w) / (max_w - min_w) graph.add_edge(sa, sb, weight=w) weight_values.append(w) weight_values = np.array(weight_values) if threshold == -1: threshold = np.median(weight_values) - np.nextafter(0., 1) print(np.min(weight_values), np.median(weight_values), np.max(weight_values)) under_threshold_edges = [(u, v) for (u, v, d) in graph.edges(data=True) if d['weight'] < threshold] graph.remove_edges_from(under_threshold_edges) if cliques: cliques = list(nx.find_cliques(graph)) node_cliques = nx.cliques_containing_node(graph, list(graph.nodes()), cliques) else: node_cliques = defaultdict(list) layout_fn = getattr(nx, layout + "_layout", "fruchterman_reingold_layout") pos = layout_fn(graph, weight="weight") fig = _mk_networkx_figure(graph, pos, use_weights=True, node_cliques=node_cliques) oplot(fig)
def get_triangles(ring, G): """Given the set of vertices return triangle entirely contained within the set""" cliques = nx.cliques_containing_node(G, list(ring)) ring_set = set(ring) triangles = [] triangles_sets = [] # this finds same clique 3 times for key in cliques.keys(): # all cliques where virtex "key" is present for triangle in cliques[key]: triangle_set = set(triangle) if triangle_set.issubset(ring_set): if triangle_set in triangles_sets: pass else: triangles_sets.append(set(triangle)) triangles.append(triangle) return triangles
print "-------------------------------------" print "Compute size of largest maximal clique containing a given node" print "-------------------------------------" maximalCliqueSizePerNode = nx.node_clique_number(G) print maximalCliqueSizePerNode print "-------------------------------------" print "Compute number of maximal cliques for each node" print "-------------------------------------" noOfMaximalCliquesPerNode = nx.number_of_cliques(G) print noOfMaximalCliquesPerNode print "-------------------------------------" print "Compute list of cliques containing a given node" print "-------------------------------------" lcliques = nx.cliques_containing_node(G) print lcliques print "-------------------------------------" print "Writing data into global file" print "-------------------------------------" globalCliqueFile = 'data/globalCliqueFile.csv' mode = ''; if os.path.isfile(globalCliqueFile): mode = 'a'; else: mode = 'wb'; with open(globalCliqueFile, mode) as csvfile: sw = csv.writer(csvfile, delimiter=',') data = ['cuisine', 'edgeWtThreshold', 'NumberOfCliques', 'Size of Largest Maximal Clique'];
e_c = nx.eigenvector_centrality(G) print e_c # ### Clique # In[36]: nx.draw(G, with_labels=True) cliques = list(nx.enumerate_all_cliques(G)) print cliques for i in range(1, len(max(cliques, key=len)) + 1): print i, "clique:", [x for x in cliques if len(x) == i] # In[37]: node_cliques = nx.cliques_containing_node(G, nodes=1) print(node_cliques) # ### Triangle, Transitivity, Clustering Coefficient # In[38]: print nx.triangles(G) print nx.transitivity(G) print nx.clustering(G) # ## Comunity # https://github.com/taynaud/python-louvain # In[39]:
# lvl2.append(graphs_len[i]) print 'Η λίστα των μεγεθών των κλικών είναι:' # print 'The list of clique sizes is:' print lvl2 print str(" ") print 'Ο αριθμός κλίκας (το μέγεθος της μεγαλύτερης κλίκας) του G είναι:', nx.graph_clique_number( G) # print 'The clique number (size of the largest clique) for G is:', nx.graph_clique_number(G) # print sorted(nx.connected_components(G), key = len, reverse=True) print str(" ") print 'Το λεξικό των κλικών που περιέχουν κάθε κόμβο είναι:' # print 'The dictionary of the lists of cliques containing each node:' print nx.cliques_containing_node(G) print str(" ") print 'Το λεξικό του πλήθους κλικών που περιέχουν κάθε κόμβο είναι:' # print 'The dictionary of the numbers of maximal cliques for each node:' print nx.number_of_cliques(G) print str(" ") print 'Το λεξικό του μεγέθους των μεγαλύτερων κλικών που περιέχουν κάθε κόμβο είναι:' # print 'The dictionary of the sizes of the largest maximal cliques containing each given node:' print nx.node_clique_number(G) print str(" ") maxclique = [ clq for clq in nx.find_cliques(G) if len(clq) == nx.graph_clique_number(G) ]
def create_clique_top_node(graphml_filename): g = read_graphml(graphml_filename) top_node = get_top_node(g) print('before finding cliques for ' + top_node[0]) cliques = nx.cliques_containing_node(g, nodes=[top_node]) write_cliques_to_csv(cliques=cliques)
diameter # Center # DONT EXECUTE--TAKES TOO LONG center = nx.center(friendship_graph) center # Eccentricity # DONT EXECUTE--TAKES TOO LONG eccentricity = nx.eccentricity(friendship_graph) eccentricity # Clique clique = nx.max_clique(friendship_graph) threezeroeight_cliques = nx.cliques_containing_node(friendship_graph,308) len(threezeroeight_cliques) # Transitvity = 0.02348 transitivity = nx.transitivity(friendship_graph) # Return density of graph nx.density(friendship_graph) # Return Connectivity of a graph # DONT EXECUTE--TAKES TOO LONG connectivity_dict = nx.node_connectivity(friendship_graph) connectivity = nx.node_connectivity(friendship_graph) # Shortest Path
def main(): slicing="monthly" #"monthly" # or "yearly" flag_cohort=1 # to only consider firms from the cohort, or every firm string_cohort="" if flag_cohort==1: string_cohort="_cohort_" num_periods=253 ii=0 while ii <= num_periods: ii+=1 try: #### i read pickles for networks #network_period="../Results/Simplified_supply_network_slicing_"+str(slicing)+"_period_"+str(ii)+"_.pickle" network_period="../Results/Simplified_supply_network_slicing_"+str(slicing)+"_period_"+str(ii)+string_cohort+".pickle" G_period = pickle.load(open(network_period, 'rb')) N_period = len(G_period.nodes()) #network_accumulate_year="../Results/Simplified_supply_network_yearly_acummlate_until_period_"+str(ii)+"_.pickle" network_accumulate_year="../Results/Simplified_supply_network_yearly_acummlate_until_period_"+str(ii)+string_cohort+".pickle" G = pickle.load(open(network_accumulate_year, 'rb')) print ii,"N",N_period if N_period >0: # to avoid the couple of empty networks around period 120 list_networks=[(network_period,G_period), (network_accumulate_year,G)] for tupla in list_networks: Gi= tupla[1] network_name= tupla[0] dict_clustering=nx.clustering(Gi) dict_betweenness_nodes=nx.betweenness_centrality(Gi) max_k=0 list_k=[] for node in Gi.nodes(): k=Gi.degree(node) Gi.node[node]["degree"]=k list_k.append(k) max_k=max(list_k) Gi.node[node]["CC"]=dict_clustering[node] Gi.node[node]["betweeness"]= dict_betweenness_nodes[node] dict_betweenness_edges=nx.edge_betweenness_centrality(Gi, normalized=True, weight=None) # it returns dictionary of edges (tuplas as keys) with betweenness centrality as the value. ### i can also calculate the edges' betweenness taking into account their weight!! for edge in Gi.edges(): try: Gi.edge[edge[0]][edge[1]]["link_betweeness"]=dict_betweenness_edges[edge] except TypeError: Gi.edge[edge[0]][edge[1]]["link_betweeness"]="NA" print "edge",edge, "not found" for node in Gi.nodes(): maximo=1 lista=nx.cliques_containing_node(Gi, node) #list of lists, ej: [[207925, 203592], [207925, 10500761], [207925, 200554], [207925, 202587]] for elem in lista: if len(elem) > maximo: maximo=len(elem) Gi.node[node]['max_clique_size']=maximo calculate_kshell(Gi, max_k) ######## i (over)write pickles for networks including the new attributes pickle.dump(Gi, open(network_name, 'wb')) print " written", network_name except IOError: pass # if network pickle not found
MNDR.add_nodes_from(["1","5"]) MNDR.add_edges_from([("1","2"),("2","3"),("2","4"),("2","5"),("3","5"),("4","5"),("3","5"), ("4","5"),("5","5")]) color_map = [] for node in MNDR: if (node == "5"): color_map.append('yellow') else: color_map.append('red') replicas_5 = [] for j in range(30): start_time = time() for i in range (100000): cliques = nx.cliques_containing_node(MNDR) elapsed_time = time() - start_time replicas_5.append(elapsed_time) print(replicas_5) print(len(replicas_5)) normality_test=stats.shapiro(replicas_5) print(normality_test) hist, bin_edges=np.histogram(replicas_f5,density=True) first_edge, last_edge = np.min(replicas_f5),np.max(replicas_f5) n_equal_bins = 10 bin_edges = np.linspace(start=first_edge, stop=last_edge,num=n_equal_bins + 1, endpoint=True) plt.hist(replicas_f5, bins=bin_edges, rwidth= 0.8, color= 'orange')
def create_partial_ktrees(self, n, k, p): """ Generates partial k-trees using the parametric model: (n,k,p) """ # Returns a caveman graph of l cliques of size k. G = nx.caveman_graph(1, k + 1) # Insert new nodes for i in range(0, n - k - 1): G.add_node(i + k + 1) adjacent_nodes = set() while len(adjacent_nodes) < k: cliques_of_G = nx.cliques_containing_node(G) temp = int( np.round( np.random.uniform(low=0.0, high=len(cliques_of_G) - 1, size=None))) temp_list = cliques_of_G.get(temp) temp2 = int( np.round( np.random.uniform(low=0.0, high=len(temp_list) - 1, size=None))) temp_list = temp_list[temp2] if len(temp_list) >= k: while (len(temp_list) != k): del temp_list[0] [adjacent_nodes.add(i) for i in temp_list] for j in list(adjacent_nodes): G.add_edge(i + k + 1, j) a = set() b = set() pairs = set() # Remove p percent edges from the k-tree uniformly at random if p > 0: num = G.number_of_edges() num = int(num * (p / 100)) # Finding the edges to remove without replacement while len(pairs) < num: temp1 = 0 temp2 = 0 while temp1 == temp2: temp1 = int( np.round(np.random.uniform(low=0.0, high=n, size=None))) temp2 = int( np.round(np.random.uniform(low=0.0, high=n, size=None))) temp_pairs = set() temp_pairs.add(temp1) temp_pairs.add(temp2) if temp_pairs not in pairs: pairs.add(frozenset(temp_pairs)) pairs = list(pairs) pairs_new = [] [pairs_new.append(list(i)) for i in pairs] for i in range(0, num): if G.has_edge(pairs_new[i][0], pairs_new[i][1]): G.remove_edge(pairs_new[i][0], pairs_new[i][1]) return G
def main(): path = "../Data/95_05NYCgamentdata/" initial_year = 1985 final_year = 2005 first_day = dt.datetime(1985, 1, 1) y = initial_year flag_hist = "NO" flag_network_metrics = "NO" flag_remove_selfloops = "YES" string_filename_loops = "" if flag_remove_selfloops == "YES": string_filename_loops = "_no_self_loops" string_filename = "" if flag_network_metrics == "NO": string_filename = "_no_network_metrics" name1 = "../Results/Time_evol_num_transactions.dat" file1 = open(name1, 'wt') file1.close() name2 = "../Results/Time_evol_num_firms.dat" file2 = open(name2, 'wt') file2.close() name3 = "../Results/Time_evol_num_neg_transactions.dat" file3 = open(name3, 'wt') file3.close() name4 = "../Results/Time_evol_num_self_transactions.dat" file4 = open(name4, 'wt') file4.close() name5 = "../Results/Time_evol_num_self_neg_transactions.dat" file5 = open(name5, 'wt') file5.close() master_name = "../Results/Master_file_transactions" + string_filename + string_filename_loops + ".csv" file_master = open(master_name, 'wt') print >> file_master, "Mi Cj Mi_Cj tot_adj_gross_ij pos_adj_gross_ij neg_adj_gross_ij pos_adj_gross_i neg_adj_gross_i pos_adj_gross_j neg_adj_gross_j P_inf P_inf_previous_year Cumulat_num_inf_years frac_previous_inf_years year ki kj K_nn_i K_nn_j HHIi_as_manuf HHIi_as_contr HHIj_as_manuf HHIj_as_contr betweenness_i betweenness_j link_betweenness_ij max_clique_i max_clique_j kshell_i kshell_j num_manuf_i num_contr_i num_manuf_j num_contr_j num_transact_ij num_transact_i num_transact_j amount_pos_self_trans_i amount_pos_self_trans_j amount_neg_self_trans_i amount_neg_self_trans_j num_pos_self_trans_i num_pos_self_trans_j num_neg_self_trans_i num_neg_self_trans_j fract_pos_bussiness_of_M_with_C fract_pos_bussiness_of_C_with_M degree_asymmetry_ij business_asymmetry_ij size_tot_errors_ij" file_master.close() G = nx.Graph() print print list_periods = [] list_firm_ids = [] cont_transactions = 0. cont_neg_transactions = 0. cont_self_transactions = 0. cont_self_neg_transactions = 0. dict_firm_id_active_years = {} dict_firm_total_trans_volum = {} dict_firm_num_pos_trans = {} dict_firm_num_neg_trans = {} dict_tuple_link_cumulat_previous_Pinf = {} dict_tuple_link_fract_previous_inf_years = {} dict_tuple_link_Pinf_previous_year = {} dict_manuf_dict_contr_amounts = { } # for each manufact., dict of its contractors and total amounts dict_contr_dict_manuf_amounts = { } # for each contract., dict of its manuf. and total amounts dict_link_num_pos_trans = {} dict_link_num_neg_trans = {} list_neg_adj_gross = [] list_pos_adj_gross = [] list_tuplas = [] while y <= final_year: list_neg_adj_gross_year = [] list_pos_adj_gross_year = [] list_firm_ids_year = [] list_tuplas_year = [] dict_firm_total_trans_volum_year = {} dict_firm_tot_pos_trans_year = {} dict_firm_tot_neg_trans_year = {} dict_firm_num_pos_trans_year = {} dict_firm_num_neg_trans_year = {} dict_link_num_pos_trans_year = {} dict_link_num_neg_trans_year = {} dict_firm_amount_pos_self_trans_year = {} dict_firm_amount_neg_self_trans_year = {} dict_firm_num_pos_self_trans_year = {} dict_firm_num_neg_self_trans_year = {} dict_manuf_dict_contr_amounts_year = { } # for each manufact., dict of its contractors and total amounts dict_contr_dict_manuf_amounts_year = { } # for each contract., dict of its manuf. and total amounts cont_transactions_year = 0. cont_neg_transactions_year = 0. cont_self_transactions_year = 0. cont_self_neg_transactions_year = 0. G_year = nx.Graph() list_manuf_year = [] list_contr_year = [] list_non_self_contractors = [] ################## ####### input datafile: name0 = "fhistory" + str(y) + ".csv" print "\nreading: ", path + name0, "......." #### paidbyfi,paidforf,periodfr,periodto,adjgr,gross,net,caf,liqdmg,cafper,rateper,ratecode cont = 1 csvfile = open(path + name0, 'rb') reader = csv.reader(csvfile, delimiter=',', quotechar='"') next(reader, None) # to skip the header for list_row in reader: cont += 1 try: # some lines are missing the contractor or manufacturer: skip manufacturer = int(list_row[0]) ##paidbyfirm contractor = int(list_row[1]) ## paidforfirm list_manuf_year.append(manufacturer) list_contr_year.append(contractor) cont_transactions += 1 cont_transactions_year += 1 ############## for now i deal with integers Dollars !! (easier for histograms) adj_gross = int(round(float((list_row[4])))) flag_ignore_row = 0 if manufacturer == contractor: cont_self_transactions += 1. cont_self_transactions_year += 1. if flag_remove_selfloops == "YES": flag_ignore_row = 1 ####### if it is a self-transaction, i only record this (no network variables nor HHI etc) if adj_gross < 0.: cont_self_neg_transactions += 1. cont_self_neg_transactions_year += 1. try: dict_firm_amount_neg_self_trans_year[ manufacturer] += adj_gross except KeyError: dict_firm_amount_neg_self_trans_year[ manufacturer] = adj_gross try: dict_firm_num_neg_self_trans_year[ manufacturer] += 1. except KeyError: dict_firm_num_neg_self_trans_year[ manufacturer] = 1. else: try: dict_firm_amount_pos_self_trans_year[ manufacturer] += adj_gross except KeyError: dict_firm_amount_pos_self_trans_year[ manufacturer] = adj_gross try: dict_firm_num_pos_self_trans_year[ manufacturer] += 1. except KeyError: dict_firm_num_pos_self_trans_year[ manufacturer] = 1. else: list_non_self_contractors.append(contractor) if flag_ignore_row == 0: # in general i do not count the self-transactions (for network metrics nor HHI etc) tupla_link = (manufacturer, contractor) list_tuplas_year.append(tupla_link) list_tuplas.append(tupla_link) list_firm_ids.append(manufacturer) list_firm_ids.append(contractor) list_firm_ids_year.append(manufacturer) list_firm_ids_year.append(contractor) ########### initial date for transaction period from_date = list_row[ 2] # format examples 040185 #010185 if len(from_date) < 6: from_date = "0" + from_date # when the date is 40185 instead of 040185 month = int(from_date[:2]) day = int(from_date[-4:-2]) year = int(from_date[-2:]) if year > 80: year += 1900 else: year += 2000 try: initial_date = dt.datetime(year, month, day) except ValueError: # there are a bunch of incorrect dates!!! 043185 063185 113185 023185 day = 1 month += 1 initial_date = dt.datetime(year, month, day) ########### final date for transaction period to_date = list_row[3] if len(to_date ) < 6: # when the date is 40185 instead of 040185 to_date = "0" + to_date month = int(to_date[:2]) day = int(to_date[-4:-2]) year = int(to_date[-2:]) if year > 80: year += 1900 else: year += 2000 try: final_date = dt.datetime(year, month, day) except ValueError: # there are a bunch of incorrect dates!!! 043185 063185 113185 023185 day = 1 month += 1 final_date = dt.datetime(year, month, day) period = (final_date - initial_date).days if period < 0: old_initial = initial_date old_final = final_date # some time periods have interved TO and FROM dates! initial_date = final_date final_date = old_initial period = (final_date - initial_date).days list_periods.append(period) ########### list of active years for firms try: dict_firm_id_active_years[manufacturer].append(year) except KeyError: dict_firm_id_active_years[manufacturer] = [] dict_firm_id_active_years[manufacturer].append(year) if manufacturer != contractor: try: dict_firm_id_active_years[contractor].append(year) except KeyError: dict_firm_id_active_years[contractor] = [] dict_firm_id_active_years[contractor].append(year) ########### i need to initialize dicts try: dict_firm_num_neg_trans_year[manufacturer] except KeyError: dict_firm_num_neg_trans_year[manufacturer] = 0. try: dict_firm_num_neg_trans_year[contractor] except KeyError: dict_firm_num_neg_trans_year[contractor] = 0. try: dict_firm_num_neg_trans[manufacturer] except KeyError: dict_firm_num_neg_trans[manufacturer] = 0. try: dict_firm_num_neg_trans[contractor] except KeyError: dict_firm_num_neg_trans[contractor] = 0. try: dict_firm_num_pos_trans_year[manufacturer] except KeyError: dict_firm_num_pos_trans_year[manufacturer] = 0. try: dict_firm_num_pos_trans_year[contractor] except KeyError: dict_firm_num_pos_trans_year[contractor] = 0. try: dict_firm_num_pos_trans[manufacturer] except KeyError: dict_firm_num_pos_trans[manufacturer] = 0. try: dict_firm_num_pos_trans[contractor] except KeyError: dict_firm_num_pos_trans[contractor] = 0. ######### ########### initialize the same for links try: dict_link_num_pos_trans[tupla_link] except KeyError: dict_link_num_pos_trans[tupla_link] = 0. try: dict_link_num_neg_trans[tupla_link] except KeyError: dict_link_num_neg_trans[tupla_link] = 0. try: dict_link_num_pos_trans_year[tupla_link] except KeyError: dict_link_num_pos_trans_year[tupla_link] = 0. try: dict_link_num_neg_trans_year[tupla_link] except KeyError: dict_link_num_neg_trans_year[tupla_link] = 0. ############ ########OJO! NOT SURE ABOUT THIS!! do i want total, or only positive? ######## for the HHI index of manuf. and contr. # HHI is a measure of the size of firms in an industry, and an indicator of the amount of competition among them. Bounded by: 1/N (equally distributed industry), 1 (total monopoly). try: dict_manuf_dict_contr_amounts[manufacturer] except KeyError: dict_manuf_dict_contr_amounts[manufacturer] = {} try: dict_manuf_dict_contr_amounts[manufacturer][contractor] except KeyError: dict_manuf_dict_contr_amounts[manufacturer][ contractor] = 0. dict_manuf_dict_contr_amounts[manufacturer][ contractor] += adj_gross try: dict_manuf_dict_contr_amounts_year[manufacturer] except KeyError: dict_manuf_dict_contr_amounts_year[manufacturer] = {} try: dict_manuf_dict_contr_amounts_year[manufacturer][ contractor] except KeyError: dict_manuf_dict_contr_amounts_year[manufacturer][ contractor] = 0. dict_manuf_dict_contr_amounts_year[manufacturer][ contractor] += adj_gross try: dict_contr_dict_manuf_amounts[contractor] except KeyError: dict_contr_dict_manuf_amounts[contractor] = {} try: dict_contr_dict_manuf_amounts[contractor][manufacturer] except KeyError: dict_contr_dict_manuf_amounts[contractor][ manufacturer] = 0. dict_contr_dict_manuf_amounts[contractor][ manufacturer] += adj_gross try: dict_contr_dict_manuf_amounts_year[contractor] except KeyError: dict_contr_dict_manuf_amounts_year[contractor] = {} try: dict_contr_dict_manuf_amounts_year[contractor][ manufacturer] except KeyError: dict_contr_dict_manuf_amounts_year[contractor][ manufacturer] = 0. dict_contr_dict_manuf_amounts_year[contractor][ manufacturer] += adj_gross ########## total volume of (net) transactions try: dict_firm_total_trans_volum[manufacturer] += adj_gross except KeyError: dict_firm_total_trans_volum[manufacturer] = 0. dict_firm_total_trans_volum[manufacturer] += adj_gross try: dict_firm_total_trans_volum[contractor] += adj_gross except KeyError: dict_firm_total_trans_volum[contractor] = 0. dict_firm_total_trans_volum[contractor] += adj_gross ########## volume of (net) transactions YEARLY try: dict_firm_total_trans_volum_year[ manufacturer] += adj_gross except KeyError: dict_firm_total_trans_volum_year[manufacturer] = 0. dict_firm_total_trans_volum_year[ manufacturer] += adj_gross try: dict_firm_total_trans_volum_year[ contractor] += adj_gross except KeyError: dict_firm_total_trans_volum_year[contractor] = 0. dict_firm_total_trans_volum_year[ contractor] += adj_gross ########## if adj_gross < 0: list_neg_adj_gross.append(-1 * adj_gross) list_neg_adj_gross_year.append(-1 * adj_gross) cont_neg_transactions += 1 cont_neg_transactions_year += 1 ##### num of neg. transactions YEARLY dict_firm_num_neg_trans_year[manufacturer] += 1. dict_firm_num_neg_trans_year[contractor] += 1. ##### tot num of neg. transactions dict_firm_num_neg_trans[manufacturer] += 1. dict_firm_num_neg_trans[contractor] += 1. #### same for the link dict_link_num_neg_trans_year[tupla_link] += 1. dict_link_num_neg_trans[tupla_link] += 1. G_year.add_node(manufacturer) G_year.add_node(contractor) G.add_node(manufacturer) G.add_node(contractor) try: G.edge[manufacturer][contractor][ "neg_weight"] += adj_gross except KeyError: G.add_edge(manufacturer, contractor, neg_weight=adj_gross) try: G_year.edge[manufacturer][contractor][ "neg_weight"] += adj_gross except KeyError: G_year.add_edge(manufacturer, contractor, neg_weight=adj_gross) #### add up to the total neg. yearly amount for each firm try: dict_firm_tot_neg_trans_year[ manufacturer] += adj_gross except KeyError: dict_firm_tot_neg_trans_year[manufacturer] = 0. dict_firm_tot_neg_trans_year[ manufacturer] += adj_gross try: dict_firm_tot_neg_trans_year[ contractor] += adj_gross except KeyError: dict_firm_tot_neg_trans_year[contractor] = 0. dict_firm_tot_neg_trans_year[ contractor] += adj_gross else: list_pos_adj_gross.append(adj_gross) list_pos_adj_gross_year.append(adj_gross) ##### num of posit. transactions YEARLY dict_firm_num_pos_trans_year[manufacturer] += 1. dict_firm_num_pos_trans_year[contractor] += 1. ##### tot. num of posit. transactions dict_firm_num_pos_trans[manufacturer] += 1. dict_firm_num_pos_trans[contractor] += 1. #### same for the link dict_link_num_pos_trans_year[tupla_link] += 1. dict_link_num_pos_trans[tupla_link] += 1. try: G.edge[manufacturer][contractor][ "pos_weight"] += adj_gross except KeyError: G.add_edge(manufacturer, contractor, pos_weight=adj_gross) try: G_year.edge[manufacturer][contractor][ "pos_weight"] += adj_gross except KeyError: G_year.add_edge(manufacturer, contractor, pos_weight=adj_gross) #### add up to the total neg. yearly amount for each firm try: dict_firm_tot_pos_trans_year[ manufacturer] += adj_gross except KeyError: dict_firm_tot_pos_trans_year[manufacturer] = 0. dict_firm_tot_pos_trans_year[ manufacturer] += adj_gross try: dict_firm_tot_pos_trans_year[ contractor] += adj_gross except KeyError: dict_firm_tot_pos_trans_year[contractor] = 0. dict_firm_tot_pos_trans_year[ contractor] += adj_gross except ValueError: pass # some lines (very rare, one single instance) are missing the contractor or manufacturer ############################# end of loop to read year file ########################### if flag_hist == "YES": try: name_h = "../Results/histogram_pos_adj_gross_values_years_" + str( y) + ".dat" histograma_gral.histogram(list_pos_adj_gross_year, name_h) except: pass try: name_h = "../Results/histogram_neg_adj_gross_values_years_" + str( y) + ".dat" histograma_gral.histogram(list_neg_adj_gross_year, name_h) except: pass for link in dict_link_num_pos_trans_year: G_year[link[0]][ link[1]]['num_pos_trans'] = dict_link_num_pos_trans_year[link] for link in dict_link_num_neg_trans_year: G_year[link[0]][ link[1]]['num_neg_trans'] = dict_link_num_neg_trans_year[link] G_year[link[0]][link[1]][ 'fract_neg_trans'] = dict_link_num_neg_trans_year[link] / ( dict_link_num_neg_trans_year[link] + dict_link_num_pos_trans_year[link]) if G_year[link[0]][link[1]]['num_neg_trans'] == 0.: G_year[link[0]][link[1]]['neg_weight'] = 0. if G_year[link[0]][link[1]]['num_pos_trans'] == 0.: G_year[link[0]][link[1]]['pos_weight'] = 0. for firm in G_year.nodes(): G_year.node[firm][ 'vol_transct'] = dict_firm_total_trans_volum_year[firm] G_year.node[firm][ 'fract_neg_transct'] = dict_firm_num_neg_trans_year[firm] / ( dict_firm_num_neg_trans_year[firm] + dict_firm_num_pos_trans_year[firm]) G_year.node[firm]['num_transact'] = dict_firm_num_neg_trans_year[ firm] + dict_firm_num_pos_trans_year[firm] for firm in dict_firm_tot_pos_trans_year: G_year.node[firm][ 'vol_pos_transct'] = dict_firm_tot_pos_trans_year[firm] for firm in dict_firm_tot_neg_trans_year: G_year.node[firm][ 'vol_neg_transct'] = dict_firm_tot_neg_trans_year[firm] ###### fill in the gaps for those firms without positive or neg. transactions for node in G_year.nodes(): try: G_year.node[node]['vol_pos_transct'] except KeyError: G_year.node[node]['vol_pos_transct'] = 0. try: G_year.node[node]['vol_neg_transct'] except KeyError: G_year.node[node]['vol_neg_transct'] = 0. print " year", y print " # firms:", len(set(list_firm_ids_year)) print " # manufacturers:", len(set(list_manuf_year)) print " # contractors", len( set(list_contr_year)), "(firms can act as both in general)" print " # non-self contractors", len(set(list_non_self_contractors)) print " # transactions:", int(cont_transactions_year) print " # negative transactions:", int( cont_neg_transactions_year ), " ", cont_neg_transactions_year / cont_transactions_year * 100., "%" print " # self-transactions:", int( cont_self_transactions_year ), " ", cont_self_transactions_year / cont_transactions_year * 100., "%" print " # self-neg-transactions:", int( cont_self_neg_transactions_year ), " ", cont_self_neg_transactions_year / cont_transactions_year * 100., "%" print " # unique tuples:", len(set(list_tuplas_year)) print "row count:", cont - 1 file1 = open(name1, 'at') print >> file1, y, int(cont_transactions_year) file1.close() file2 = open(name2, 'at') print >> file2, y, len(set(list_firm_ids_year)) file2.close() file3 = open(name3, 'at') print >> file3, y, int( cont_neg_transactions_year ), cont_neg_transactions_year / cont_transactions_year * 100. file3.close() file4 = open(name4, 'at') print >> file4, y, int( cont_self_transactions_year ), cont_self_transactions_year / cont_transactions_year * 100. file4.close() print "# manuf. in dict", len(dict_manuf_dict_contr_amounts_year) print "# contr. in dict", len(dict_contr_dict_manuf_amounts_year) ######## HHI index as manufacturers and contractors for node in G_year.nodes(): G_year.node[node]['num_manuf'] = 0 G_year.node[node]['num_contractors'] = 0 for manufacturer in dict_manuf_dict_contr_amounts_year: lista = list( dict_manuf_dict_contr_amounts_year[manufacturer].values()) HHI = Herfindahl_index.calculate_HHI( lista) ## tuple (H, H_normalized) !!!! G_year.node[manufacturer]['HHI_as_manuf'] = HHI[0] G_year.node[manufacturer]['num_contractors'] = len(lista) for contractor in dict_contr_dict_manuf_amounts_year: lista = list( dict_contr_dict_manuf_amounts_year[contractor].values()) HHI = Herfindahl_index.calculate_HHI( lista) ## tuple (H, H_normalized) !!!! G_year.node[contractor]['HHI_as_contr'] = HHI[0] G_year.node[contractor]['num_manuf'] = len(lista) ################ i add topological attributes to the nodes ############################# print "calculating network metrics....." print " CC..." if flag_network_metrics == "YES": dict_clustering = nx.clustering(G_year) print " node betweenness..." if flag_network_metrics == "YES": dict_betweenness_nodes = nx.betweenness_centrality(G_year) list_k = [] for node in G_year.nodes(): k = G_year.degree(node) G_year.node[node]["degree"] = k list_k.append(k) if flag_network_metrics == "YES": G_year.node[node]["CC"] = dict_clustering[node] G_year.node[node]["betweeness"] = dict_betweenness_nodes[node] else: G_year.node[node]["CC"] = 0. G_year.node[node]["betweeness"] = 0. max_k = max(list_k) print " edge betweenness..." if flag_network_metrics == "YES": dict_betweenness_edges = nx.edge_betweenness_centrality( G_year, normalized=True, weight=None ) # it returns dictionary of edges (tuplas as keys) with betweenness centrality as the value. ### i can also calculate the edges' betweenness taking into account their weight!! for edge in G_year.edges(): # print edge try: if flag_network_metrics == "YES": G_year.edge[edge[0]][edge[1]][ "link_betweeness"] = dict_betweenness_edges[edge] else: G_year.edge[edge[0]][edge[1]]["link_betweeness"] = 0. except KeyError: G_year.edge[edge[0]][edge[1]]["link_betweeness"] = "NA" print "edge", edge, "not found" for node in G_year.nodes(): try: G_year.node[node]['HHI_as_manuf'] except KeyError: G_year.node[node]['HHI_as_manuf'] = "NA" try: G_year.node[node]['HHI_as_contr'] except KeyError: G_year.node[node]['HHI_as_contr'] = "NA" ####### kshell structure print " kshell..." calculate_kshell(G_year, max_k) ####### max clique size print " max-clique..." for i in G_year.nodes(): if flag_network_metrics == "YES": maximo = 1 lista = nx.cliques_containing_node( G_year, i ) #list of lists, ej: [[207925, 203592], [207925, 10500761], [207925, 200554], [207925, 202587]] # print i, lista for elem in lista: if len(elem) > maximo: maximo = len(elem) G_year.node[i]['max_clique_size'] = maximo else: G_year.node[i]['max_clique_size'] = 0 ############## i write the corresponding rows in the master file (one line per link and per year) ######################### file_master = open(master_name, 'at') for edge in G_year.edges(): try: dict_tuple_link_Pinf_previous_year[edge] except KeyError: dict_tuple_link_Pinf_previous_year[edge] = 0. if year == initial_year: dict_tuple_link_Pinf_previous_year[edge] = "NA" manufacturer = edge[0] contractor = edge[1] P_inf = 0. # 1: if there has been at least one neg. transaction between manuf. and contr. during the year, 0 otherwise if G_year[manufacturer][contractor]['neg_weight'] != 0: P_inf = 1. try: dict_tuple_link_cumulat_previous_Pinf[edge] except KeyError: dict_tuple_link_cumulat_previous_Pinf[edge] = 0. if year == initial_year: dict_tuple_link_cumulat_previous_Pinf[edge] = "NA" dict_tuple_link_fract_previous_inf_years[edge] = 0. if y > initial_year: dict_tuple_link_fract_previous_inf_years[ edge] = dict_tuple_link_cumulat_previous_Pinf[edge] / ( y - 1985.) else: dict_tuple_link_fract_previous_inf_years[edge] = "NA" lista = [] # avg degree of the manufacturer's neighbours for n in G.neighbors(manufacturer): lista.append(float(G.degree(n))) K_nn_i = 0. try: K_nn_i = numpy.mean(lista) except: pass lista = [] # avg degree of the manufacturer's neighbours for n in G.neighbors(contractor): lista.append(float(G.degree(n))) K_nn_j = 0. try: K_nn_j = numpy.mean(lista) except: pass amount_pos_self_trans_i = 0. amount_pos_self_trans_j = 0. try: amount_pos_self_trans_i = dict_firm_amount_pos_self_trans_year[ manufacturer] except KeyError: pass try: amount_pos_self_trans_j = dict_firm_amount_pos_self_trans_year[ contractor] except KeyError: pass amount_neg_self_trans_i = 0. amount_neg_self_trans_j = 0. try: amount_neg_self_trans_i = dict_firm_amount_neg_self_trans_year[ manufacturer] except KeyError: pass try: amount_neg_self_trans_j = dict_firm_amount_neg_self_trans_year[ contractor] except KeyError: pass num_pos_self_trans_i = 0. num_pos_self_trans_j = 0. try: num_pos_self_trans_i = dict_firm_num_pos_self_trans_year[ manufacturer] except KeyError: pass try: num_pos_self_trans_j = dict_firm_num_pos_self_trans_year[ contractor] except KeyError: pass num_neg_self_trans_i = 0. num_neg_self_trans_j = 0. try: num_neg_self_trans_i = dict_firm_num_neg_self_trans_year[ manufacturer] except KeyError: pass try: num_neg_self_trans_j = dict_firm_num_neg_self_trans_year[ contractor] except KeyError: pass try: fract_pos_bussiness_of_M_with_C = G_year[manufacturer][ contractor]['pos_weight'] / G_year.node[manufacturer][ 'vol_pos_transct'] except ZeroDivisionError: fract_pos_bussiness_of_M_with_C = "NA" # (otherwise i cant define business asymmetry) try: fract_pos_bussiness_of_C_with_M = G_year[manufacturer][ contractor]['pos_weight'] / G_year.node[contractor][ 'vol_pos_transct'] except ZeroDivisionError: fract_pos_bussiness_of_C_with_M = "NA" #(otherwise i cant define business asymmetry) try: error_size_ij = -1. * G_year[manufacturer][contractor][ 'neg_weight'] / G_year[manufacturer][contractor][ 'pos_weight'] except ZeroDivisionError: error_size_ij = "NA" if G_year[manufacturer][contractor]['neg_weight'] != 0.: error_size_ij = -1. degree_asymmetry_ij = float( (G_year.degree(manufacturer) - G_year.degree(contractor)) * (G_year.degree(manufacturer) - G_year.degree(contractor)) ) / float( (G_year.degree(manufacturer) + G_year.degree(contractor)) * (G_year.degree(manufacturer) + G_year.degree(contractor))) business_asymmetry_ij = "NA" try: business_asymmetry_ij = ( (fract_pos_bussiness_of_M_with_C - fract_pos_bussiness_of_C_with_M) * (fract_pos_bussiness_of_M_with_C - fract_pos_bussiness_of_C_with_M)) / ( (fract_pos_bussiness_of_M_with_C + fract_pos_bussiness_of_C_with_M) * (fract_pos_bussiness_of_M_with_C + fract_pos_bussiness_of_C_with_M)) except: pass # either for a zerodivision error, or because one of the elements is a NA # print >> file_master, "Mi Cj Mi_Cj tot_adj_gross_ij pos_adj_gross_ij neg_adj_gross_ij pos_adj_gross_i neg_adj_gross_i pos_adj_gross_j neg_adj_gross_j P_inf P_inf_previous_year Cumulat_num_inf_years frac_previous_inf_years year ki kj K_nn_i K_nn_j HHIi_as_manuf HHIi_as_contr HHIj_as_manuf HHIj_as_contr betweenness_i betweenness_j link_betweenness_ij max_clique_i max_clique_j kshell_i kshell_j num_manuf_i num_contr_i num_manuf_j num_contr_j num_transact_ij num_transact_i num_transact_j amount_pos_self_trans_i amount_pos_self_trans_j amount_neg_self_trans_i amount_neg_self_trans_j num_pos_self_trans_i num_pos_self_trans_j num_neg_self_trans_i num_neg_self_trans_j fract_pos_bussiness_of_M_with_C fract_pos_bussiness_of_C_with_M degree_asymmetry_ij business_asymmetry_ij size_tot_errors_ij" print >> file_master, manufacturer, contractor, str( manufacturer ) + str( contractor ), G_year[manufacturer][contractor]['pos_weight'] + G_year[manufacturer][contractor][ 'neg_weight'], G_year[manufacturer][contractor]['pos_weight'], G_year[manufacturer][ contractor]['neg_weight'], G_year.node[manufacturer]['vol_pos_transct'], G_year.node[ manufacturer]['vol_neg_transct'], G_year.node[contractor][ 'vol_pos_transct'], G_year.node[contractor]['vol_neg_transct'], P_inf, dict_tuple_link_Pinf_previous_year[ edge], dict_tuple_link_cumulat_previous_Pinf[edge], dict_tuple_link_fract_previous_inf_years[ edge], y, G_year.degree( manufacturer ), G_year.degree( contractor ), K_nn_i, K_nn_j, G_year.node[manufacturer][ 'HHI_as_manuf'], G_year.node[manufacturer][ 'HHI_as_contr'], G_year.node[contractor]['HHI_as_manuf'], G_year.node[contractor][ 'HHI_as_contr'], G_year.node[manufacturer]['betweeness'], G_year.node[contractor]['betweeness'], G_year[ manufacturer][contractor]['link_betweeness'], G_year.node[ manufacturer]['max_clique_size'], G_year.node[contractor][ 'max_clique_size'], G_year.node[manufacturer][ 'kshell'], G_year.node[contractor]['kshell'], G_year.node[ manufacturer]['num_manuf'], G_year.node[manufacturer][ 'num_contractors'], G_year.node[contractor][ 'num_manuf'], G_year.node[contractor]['num_contractors'], G_year[ manufacturer][contractor]['num_pos_trans'] + G_year[ manufacturer][contractor][ 'num_neg_trans'], G_year.node[manufacturer][ 'num_transact'], G_year.node[contractor][ 'num_transact'], amount_pos_self_trans_i, amount_pos_self_trans_j, amount_neg_self_trans_i, amount_neg_self_trans_j, num_pos_self_trans_i, num_pos_self_trans_j, num_neg_self_trans_i, num_neg_self_trans_j, fract_pos_bussiness_of_M_with_C, fract_pos_bussiness_of_C_with_M, degree_asymmetry_ij, business_asymmetry_ij, error_size_ij ### for next year try: dict_tuple_link_Pinf_previous_year[edge] = P_inf except TypeError: dict_tuple_link_Pinf_previous_year[edge] = 0. try: dict_tuple_link_cumulat_previous_Pinf[edge] += P_inf except TypeError: dict_tuple_link_cumulat_previous_Pinf[edge] = 0. file_master.close() ######## write the yearly network filename_network = "../Results/Supply_network_year_" + str(y) pickle.dump(G_year, open(filename_network + ".pickle", 'wb')) print " written", filename_network + ".pickle" nx.write_gml(G_year, filename_network + ".gml") print " written", filename_network + ".gml" print " N:", len(G_year.nodes()), " L:", len(G_year.edges()) G_no_loops = remove_self_loops(G_year) print " without self-loops:", len(G_no_loops.nodes()), " L:", len( G_no_loops.edges()) print "# nodes (aggregated so far):", len( G.nodes()), " # links (id):", len(G.edges()) y += 1 ################ new year file ################################################## ################################################## ################################################## print "written:", master_name print "\n\nAggregated network:" print "tot. # firms:", len(set(list_firm_ids)) print "tot. # transactions:", int(cont_transactions) print "tot. # negative transactions:", int( cont_neg_transactions ), " ", cont_neg_transactions / cont_transactions * 100., "%" print "tot. # self-transactions:", int( cont_self_transactions ), " ", cont_self_transactions / cont_transactions * 100., "%" print "tot. # self-neg-transactions:", int( cont_self_neg_transactions ), " ", cont_self_neg_transactions / cont_transactions * 100., "%" print " # unique tuples:", len(set(list_tuplas)) for firm in G.nodes(): G.node[firm]['vol_transct'] = dict_firm_total_trans_volum[firm] G.node[firm]['fract_neg_transct'] = dict_firm_num_neg_trans[firm] / ( dict_firm_num_neg_trans[firm] + dict_firm_num_pos_trans[firm]) G.node[firm]['num_transact'] = dict_firm_num_neg_trans[ firm] + dict_firm_num_pos_trans[firm] G.node[firm]['num_manuf'] = 0 G.node[firm]['num_contractors'] = 0 for link in dict_link_num_pos_trans: G[link[0]][link[1]]['num_pos_trans'] = dict_link_num_pos_trans[link] G[link[0]][link[1]]['num_neg_trans'] = dict_link_num_neg_trans[link] G[link[0]][ link[1]]['fract_neg_trans'] = dict_link_num_neg_trans[link] / ( dict_link_num_neg_trans[link] + dict_link_num_pos_trans[link]) if G[link[0]][link[1]]['num_neg_trans'] == 0.: G[link[0]][link[1]]['neg_weight'] = 0. if G[link[0]][link[1]]['num_pos_trans'] == 0.: G[link[0]][link[1]]['pos_weight'] = 0. # print sorted(list_periods) name_h = "../Results/histogram_period_lengths.dat" histograma_gral.histogram(list_periods, name_h) ################ i add topological attributes to the nodes ########################### print "calculating network metrics:" print " CC..." if flag_network_metrics == "YES": dict_clustering = nx.clustering(G) print " node betweenness..." if flag_network_metrics == "YES": dict_betweenness_nodes = nx.betweenness_centrality(G) list_k = [] for node in G.nodes(): k = G.degree(node) G.node[node]["degree"] = k list_k.append(k) if flag_network_metrics == "YES": G.node[node]["CC"] = dict_clustering[node] G.node[node]["betweeness"] = dict_betweenness_nodes[node] else: G.node[node]["CC"] = 0. G.node[node]["betweeness"] = 0. max_k = max(list_k) print " edge betweenness..." if flag_network_metrics == "YES": dict_betweenness_edges = nx.edge_betweenness_centrality( G, normalized=True, weight=None ) # it returns dictionary of edges (tuplas as keys) with betweenness centrality as the value. ### i can also calculate the edges' betweenness taking into account their weight!! for edge in G.edges(): # print edge try: if flag_network_metrics == "YES": G.edge[edge[0]][ edge[1]]["link_betweeness"] = dict_betweenness_edges[edge] else: G.edge[edge[0]][edge[1]]["link_betweeness"] = 0 except KeyError: G.edge[edge[0]][edge[1]]["link_betweeness"] = "NA" print "edge", edge, "not found" ####### k-shell decomposition (i need to make a copy and remove the self-loops from that before i can proceed) print " kshell..." if flag_network_metrics == "YES": calculate_kshell(G, max_k) ####### max clique size print " max-clique..." for node in G.nodes(): if flag_network_metrics == "YES": maximo = 1 lista = nx.cliques_containing_node( G, node ) #list of lists, ej: [[207925, 203592], [207925, 10500761], [207925, 200554], [207925, 202587]] # print i, lista for elem in lista: if len(elem) > maximo: maximo = len(elem) G.node[i]['max_clique_size'] = maximo else: G.node[i]['max_clique_size'] = 0 ####### HHI index as manufacturer and as contractor for manufact in dict_manuf_dict_contr_amounts: lista = list(dict_manuf_dict_contr_amounts[manufact].values()) HHI = Herfindahl_index.calculate_HHI(lista) G.node[manufact]['HHI_as_manuf'] = HHI[0] G.node[manufact]['num_contractors'] = len(lista) for contr in dict_contr_dict_manuf_amounts: lista = list(dict_contr_dict_manuf_amounts[contr].values()) HHI = Herfindahl_index.calculate_HHI(lista) G.node[contr]['HHI_as_contr'] = HHI[0] G.node[contr]['num_manuf'] = len(lista) ######## write the aggregated network filename_network = "../Results/Supply_network_" + str( initial_year) + "_" + str(final_year) pickle.dump(G, open(filename_network + ".pickle", 'wb')) print "written", filename_network + ".pickle" nx.write_gml(G, filename_network + ".gml") print "written", filename_network + ".gml" print "N:", len(G.nodes()), " L:", len(G.edges()) G_no_loops = remove_self_loops(G) print " without self-loops:", len(G_no_loops.nodes()), " L:", len( G_no_loops.edges()) print if flag_hist == "YES": name_h = "../Results/histogram_pos_adj_gross_values_years_" + str( initial_year) + "_" + str(final_year) + ".dat" histograma_gral.histogram(list_pos_adj_gross, name_h) # print "# obsrv:",len(list_pos_adj_gross), " max.", max(list_pos_adj_gross), " min.", min(list_pos_adj_gross), " avg:", numpy.mean(list_pos_adj_gross), " sd:", numpy.std(list_pos_adj_gross) print if flag_hist == "YES": name_h = "../Results/histogram_neg_adj_gross_values_years_" + str( initial_year) + "_" + str(final_year) + ".dat" histograma_gral.histogram(list_neg_adj_gross, name_h) # print "# obsrv:",len(list_neg_adj_gross), " max.", -1.*max(list_neg_adj_gross), " min.", -1.*min(list_neg_adj_gross), " avg:", -1.*numpy.mean(list_neg_adj_gross), " sd:", numpy.std(list_neg_adj_gross) print "written:", name1 print "written:", name2 print "written:", name3 print "written:", name4
def FindCliquesAndButterflies(): #This will compute a list of cliques. #We are using NetworkX's Implementation of Cliques #Reciprocal = true make sure that only nodes that have both edges will be included for searching for cliques listOfCliques = list( nx.find_cliques(DirectedEmailGraph.to_undirected(reciprocal=True))) #This will find the maximum clique size. #Reciprocal = true make sure that only nodes that have both edges will be included for searching for cliques #Cliques = listOfCliques provides the list of cliques already computed maxCliqueSize = nx.graph_clique_number( DirectedEmailGraph.to_undirected(reciprocal=True), cliques=listOfCliques) #This list will hold the all of the cliques with the max size that matches maxCliqueSize listOfMaximumCliques = [] #This will compute listOfMaximumCliques for i in range(len(listOfCliques)): if len(listOfCliques[i]) == maxCliqueSize: listOfMaximumCliques.append(listOfCliques[i]) #This will contain the nodes from the listOfMaximumCliques, a set is chosen because of no duplicate elements setOfNodes = set() #This will compute the setOfNodes for i in range(len(listOfMaximumCliques)): for j in range(maxCliqueSize): setOfNodes.add(listOfMaximumCliques[i][j]) #This will hold a temporary list of butterflies. The reason behind this, is we have not verrified that these are only connected by one node and one node only. tempListOfButterflies = [] maxSizeOfTempListOfButterflies = 0 #Computes tempListOfButterflies for i in setOfNodes: tempList = nx.cliques_containing_node( DirectedEmailGraph.to_undirected(reciprocal=True), nodes=i, cliques=listOfMaximumCliques) if (len(tempList) >= 2): tempListOfButterflies.append(tempList) #This is going to be used to calculate butterflies if maxSizeOfTempListOfButterflies < len(tempList): maxSizeOfTempListOfButterflies = len(tempList) #This will hold a list of list of lists that contain {{{LeftWing}{RightWing}},{{LeftWing}{RightWing}}} and so on RealListOfButterflies = [] #This Computes RealListOfButterflies for i in range(len(tempListOfButterflies)): #This will tell us if the list if larger than size 1, in the case that it is larger than size 1 #Then we know that there are either 2 cliques with 2 nodes in common #or more than 2 cliques one or more with the possibility of more than one node in common. if len(tempListOfButterflies[i]) > 2: for j in range(len(tempListOfButterflies[i])): for k in range(len(tempListOfButterflies[i])): if k > j: tempSet = set.intersection( *map(set, (tempListOfButterflies[i][j], tempListOfButterflies[i][k]))) if len(tempSet) == 1: RealListOfButterflies.append( (tempListOfButterflies[i][j], tempListOfButterflies[i][k])) #If there are only two cliques and only one node in common then we will add them to the list of butterflies if (len(tempListOfButterflies[i]) == 2): tempSet = set.intersection(*map(set, tempListOfButterflies[i])) if len(tempSet) == 1: RealListOfButterflies.append(tempListOfButterflies[i]) #Here we will start listing cliques of largest size. print "##########################################################" print "The largest size clique in the graph was: ", maxCliqueSize print "##########################################################" print "List of Butterflies: \n" for i in range(len(RealListOfButterflies)): print RealListOfButterflies[i] return
"follower_user_data"] #followers of followers screen_names followers = graph_data["followers"] #my followers followers_screen_names = graph_data[ "followers_screen_names"] #screen_names of my followers follower_data = graph_data["follower_data"] #followers of followers for follower in followers_screen_names: twitter_graph.add_edge(main_user, follower["screen_name"].lower()) for each, value in follower_user_data.items(): for name in value: twitter_graph.add_edge(follower["screen_name"].lower(), name["screen_name"].lower()) # #set positions pos = nx.random_layout(twitter_graph) plt.figure(figsize=(16, 10)) nx.draw_networkx_nodes(twitter_graph, pos, node_size=30) nx.draw_networkx_edges(twitter_graph, pos, alpha=0.01) #get the nodes that we want to draw labels for clique = nx.cliques_containing_node(twitter_graph, nodes=[main_user]) clique = clique[main_user][0] clique.append(main_user) labels = {} for name in clique: labels[name] = name nx.draw_networkx_labels(twitter_graph, pos, font_size=16, labels=labels) plt.show()
def main(graph_name): H = nx.read_gml(graph_name) for node in H.nodes(): # i remove self loops if node in H.neighbors(node): if len(H.neighbors(node)) > 1: H.remove_edge(node, node) else: H.remove_node(node) # for node in H.nodes(): # if H.node[node]['weigh_ins'] <5: #Adherent filter # H.remove_node(node) # print node, "is going down" G = nx.connected_component_subgraphs(H)[0] # Giant component print "size of the GC:", len( G.nodes()) #, "after filtering for adherence!!" #dir=graph_name.split("full_")[0] #dir=graph_name.split("master")[0] #dir=graph_name.split("method3_")[0] #dir=graph_name.split("method3_adh")[0] dir = graph_name.split("friends")[0] dir = dir + "roles/" time_in_system = 50 #minimum amount of time in the sytem for a user to be included in the statistics #name=graph_name.split('data/')[1] #name=graph_name.split('method3_50/interim/')[1] #name=graph_name.split('network_all_users/')[1] name = graph_name.split('5_points_network_2010/data/')[1] name = name.split('.gml')[0] name0 = dir + name + "_overlap_R6s_averages_" + str( time_in_system) + "days_exclude_R6s.dat" file0 = open(name0, 'wt') file0.close() contador = 0 name12 = dir + name + "_slopes_for_the_fits_average_weight_change.dat" file = open(name12, 'wt') file.close() ####for the Isolated Clusters: list_GC_nodes = [] for n in G.nodes(): list_GC_nodes.append(n) # print G.node[n]['percentage_weight_change'] # print "# users GC:",len(list_GC_nodes),"total:",len(H.nodes()) list_weight_changes_not_GC = [] for n in H.nodes(): if n not in list_GC_nodes: #print n,"not in GC" list_weight_changes_not_GC.append( float(H.node[n]['percentage_weight_change'])) #print "# users not in GC:",len(list_weight_changes_not_GC) # who="not_GC" #Nbins=18 #histograma(list_weight_changes_not_GC,Nbins,dir,name,who) ########################### list_R6s = [] # collect the R6 of the system list_R6s_label = [] list_R6s_percent_weight_change = [] for node in G.nodes(): if str(G.node[node]['role']) == "R6": list_R6s.append(node) list_R6s_label.append(G.node[node]['label']) list_R6s_percent_weight_change.append( float(G.node[node]['percentage_weight_change'])) name00 = dir + name + "R6s_and_top_tens_averages_" + str( time_in_system) + "days_exclude_R6s.dat" file0 = open(name00, 'at') print >> file0, "R6s", numpy.mean( list_R6s_percent_weight_change), numpy.std( list_R6s_percent_weight_change) file0.close() # print "\n\n R6s:\n" # for i in list_R6s_label: # print i # studying the possible cumulative effect of more than one R6 on the population: for node in G.nodes(): cont = 0 for n in G.neighbors(node): if str(G.node[n]['role']) == "R6": cont += 1 G.node[node]["R6_overlap"] = int(cont) ##### weight change for people not connected to any R6s:#### list_weight_changes_no_neighbors = [] for node in G.nodes(): interseccion = list(set(G.neighbors(node)) & set(list_R6s)) # print node, "intersection:",intersection,len(intersection) # print "because", list_R6s, "and ",G.neighbors(node) # raw_input() if len(interseccion) == 0: list_weight_changes_no_neighbors.append( G.node[node]['percentage_weight_change']) # print len(list_weight_changes_no_neighbors),"no_neighbors" who = "no_neigbors_R6s" Nbins = 18 histograma(list_weight_changes_no_neighbors, Nbins, dir, name, who) # mood test mood = stats.mood(list_weight_changes_no_neighbors, list_weight_changes_not_GC) print "mood test for", who, "against not_GC:", mood ######## # K-S test: ks = stats.ks_2samp(list_weight_changes_no_neighbors, list_weight_changes_not_GC) print "KS test for", who, "against not_GC:", ks name00 = "ks_results.dat" file0 = open(dir + name00, 'at') print >> file0, "KS test for", who, "of", graph_name, "against not_GC:", ks file0.close() ############################################# #average percentage weight change as a function of the size of the largest CLIQUE the node belongs to: absolute_max = 1 for i in G.nodes(): maximo = 1 list2 = nx.cliques_containing_node(G, i) # print i, list2 for elem in list2: # print elem,len(elem,) if len(elem) > maximo: maximo = len(elem) # print "\n",maximo G.node[i]['max_clique_size'] = maximo if absolute_max < maximo: absolute_max = maximo #print absolute_max lista = list( nx.find_cliques(G)) # crea una lista de cliques (lista de listas) max_clique = nx.graph_clique_number(G) #finds out max size clique num_tot_clique = nx.graph_number_of_cliques( G) #finds out total number of cliques # count number of 2, 3, 4, 5, 6 and 7cliques: num_2cliques = 0 num_3cliques = 0 num_4cliques = 0 num_5cliques = 0 num_6cliques = 0 num_7cliques = 0 num_8cliques = 0 num_9cliques = 0 for element in lista: if len(element) == 2: num_2cliques = num_2cliques + 1 elif len(element) == 3: num_3cliques = num_3cliques + 1 elif len(element) == 4: num_4cliques = num_4cliques + 1 elif len(element) == 5: num_5cliques = num_5cliques + 1 elif len(element) == 6: num_6cliques = num_6cliques + 1 elif len(element) == 7: num_7cliques = num_7cliques + 1 elif len(element) == 8: num_8cliques = num_8cliques + 1 elif len(element) == 9: num_9cliques = num_9cliques + 1 # print " 2: ",num_2cliques, " 3: ",num_3cliques, " 4: ",num_4cliques, " 5: ",num_5cliques, " 6: ",num_6cliques, " 7: ",num_7cliques, " 8: ",num_8cliques, " 9: ",num_9cliques, " max_clique_size:",max_clique, " num_tot_cliques:", num_tot_clique name33 = dir + name + "_percent_weight_change_vs_largest_clique_size.dat" file11 = open(name33, 'wt') file11.close() list_of_lists_for_bootstrap = [] x_positions_fit = [] y_positions_fit = [] cum_size_set = float(len(G.nodes())) tot_nodes = [] for clique_size in range(1, max_clique): clique_size = clique_size + 1 print clique_size num_users_set = cum_size_set percent_weight_change_that_clique_size = [] for n in G.nodes(): if G.node[n]['max_clique_size'] == clique_size: percent_weight_change_that_clique_size.append( float(G.node[n]['percentage_weight_change'])) tot_nodes.append(float(G.node[n]['percentage_weight_change'])) cum_size_set -= 1.0 file11 = open(name33, 'at') print >> file11, clique_size, len( percent_weight_change_that_clique_size), num_users_set / float( len(G.nodes())), numpy.mean( percent_weight_change_that_clique_size), numpy.std( percent_weight_change_that_clique_size) file11.close() if len(x_positions_fit) <= 7: x_positions_fit.append(clique_size) y_positions_fit.append( numpy.mean(percent_weight_change_that_clique_size)) list_of_lists_for_bootstrap.append( percent_weight_change_that_clique_size) slope, intercept, Corr_coef, p_value, std_err = stats.linregress( x_positions_fit, y_positions_fit) # least squeares polinomial fit print "result linear. fit for clique size dependency:" print "slope:", slope, "intercept:", intercept, "Corr_coef:", Corr_coef, "p_value:", p_value, "std_err:", std_err name11 = dir + name + "_fits_clique_size.dat" file11 = open(name11, 'wt') for i in range(len(x_positions_fit)): print >> file11, x_positions_fit[ i], intercept + x_positions_fit[i] * slope print >> file11, "\n\n", "y=", intercept, "+", slope, "*x", print "Bootstrap for clique size:\n" mean_slope, standard_dev = bootstrap(x_positions_fit[0], x_positions_fit[-1], list_of_lists_for_bootstrap) zscore = (slope - mean_slope) / standard_dev print >> file11, "bootstrap:\n", "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore print x_positions_fit[0], x_positions_fit[ -1], "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore file11.close() contador += 1 file = open(name12, 'at') print >> file, contador, mean_slope, standard_dev, "largest_clique_size" file.close() ####################################### #####dose effect of the R6s independently######## name11 = dir + name + "_dose_eff_indepently_only_one_R6_" + str( time_in_system) + "days_exclude_R6s.dat" file11 = open(name11, 'at') print >> file11, 0, "average_no_neighbors", "average_no_neighbors", "average_no_neighbors", len( list_weight_changes_no_neighbors ), numpy.mean(list_weight_changes_no_neighbors), numpy.std( list_weight_changes_no_neighbors ) # the first line of the file is actually for no_neighbors, the rest, for one_and_only_one file11.close() file11 = open(name11, 'wt') file11.close() cont = 1 list_all = [] list_all_nodes = [] for R6 in list_R6s: list_weight_changes = [] for n in G.neighbors(R6): if (G.node[n]['role'] != "R6") and (G.node[n]["R6_overlap"] == 1): list_weight_changes.append( float(G.node[n]['percentage_weight_change'])) if n not in list_all_nodes: list_all_nodes.append(n) list_all.append( float(G.node[n]['percentage_weight_change'])) if len(list_weight_changes) > 0: file11 = open(name11, 'at') print >> file11, cont, G.node[R6]['role'], G.node[R6][ 'label'], len( G.neighbors(R6)), len(list_weight_changes), numpy.mean( list_weight_changes), numpy.std(list_weight_changes) file11.close() # print cont,G.node[R6]['role'],G.node[R6]['label'], len(G.neighbors(R6)),len(list_weight_changes),numpy.mean(list_weight_changes),numpy.std(list_weight_changes) cont = cont + 1 else: # file11=open(name11, 'at') #print >> file11,cont,G.node[R6]['role'],G.node[R6]['label'],len(G.neighbors(R6)),len(list_weight_changes) #file11.close() # print cont,G.node[R6]['role'],G.node[R6]['label'],len(G.neighbors(R6)),len(list_weight_changes) cont = cont + 1 who = "one_and_only_one_R6s" Nbins = 18 histograma(list_all, Nbins, dir, name, who) #################################### print "\n\n" list_of_lists_for_bootstrap = [] x_positions_fit = [] y_positions_fit = [] averages_larger5_x = [] averages_larger5_y = [] norm = 0.0 cum_size_set = float(len(G.nodes())) - float(len(list_R6s)) for r in range(len(list_R6s) + 1): # list_BMI_changes=[] list_weight_changes = [] list_percentage_weight_changes = [] list_activities = [] num_users_set = cum_size_set for node in G.nodes(): if int(G.node[node]["R6_overlap"]) == r: if G.node[node]["role"] == "R6": # i exclude the R6s pass else: if int(G.node[node]['time_in_system']) > time_in_system: # list_BMI_changes.append(float(G.node[node]['final_BMI'])-float(G.node[node]['initial_BMI'])) list_weight_changes.append( float(G.node[node]['weight_change'])) list_percentage_weight_changes.append( float(G.node[node]['percentage_weight_change'])) list_activities.append( float(G.node[node]['activity']) / float(G.node[node]['time_in_system'])) cum_size_set -= 1.0 if len(list_percentage_weight_changes) > 0: # average_BMI_change=numpy.mean(list_BMI_changes) average_weight_change = numpy.mean(list_weight_changes) average_percentage_weight_change = numpy.mean( list_percentage_weight_changes) average_activity = numpy.mean(list_activities) #deviation_BMI=numpy.std(list_BMI_changes) deviation_weight = numpy.std(list_weight_changes) deviation_percentage_weight = numpy.std( list_percentage_weight_changes) deviation_activity = numpy.std(list_activities) #print out file0 = open(name0, 'at') print >> file0, r, len( list_percentage_weight_changes ), num_users_set / float( len(G.nodes()) ), average_percentage_weight_change, deviation_percentage_weight, average_weight_change, deviation_weight, average_activity, deviation_activity file0.close() if r <= 5: x_positions_fit.append(r) y_positions_fit.append(average_percentage_weight_change) list_of_lists_for_bootstrap.append( list_percentage_weight_changes) # else: # aux_x=r*len(list_percentage_weight_changes) # averages_larger5_x.append(aux_x) # aux_y=average_percentage_weight_change*len(list_percentage_weight_changes) # averages_larger5_y.append(aux_y) #norm+=float(len(list_percentage_weight_changes)) # x_positions_fit.append(numpy.mean(averages_larger5_x)/norm) # y_positions_fit.append(numpy.mean(averages_larger5_y)/norm) slope, intercept, Corr_coef, p_value, std_err = stats.linregress( x_positions_fit, y_positions_fit) # least squeares polinomial fit print "result linear. fit for dose eff.:" print "slope:", slope, "intercept:", intercept, "Corr_coef:", Corr_coef, "p_value:", p_value, "std_err:", std_err name11 = dir + name + "_fits_dose_eff_R6.dat" file11 = open(name11, 'wt') for i in range(len(x_positions_fit)): print >> file11, x_positions_fit[ i], intercept + x_positions_fit[i] * slope print >> file11, "\n\n", "y=", intercept, "+", slope, "*x", print "Bootstrap for dose eff. R6s:\n" mean_slope, standard_dev = bootstrap(x_positions_fit[0], x_positions_fit[-1], list_of_lists_for_bootstrap) zscore = (slope - mean_slope) / standard_dev print >> file11, "bootstrap:\n", "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore print x_positions_fit[0], x_positions_fit[ -1], "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore file11.close() contador += 1 file = open(name12, 'at') print >> file, contador, mean_slope, standard_dev, "dose_eff" file.close() #### averages for every R6's egonetwork:######### cont = 1 list_all_ = [] list_all_nodes_ = [] for node in list_R6s: neighbors = G.neighbors(node) #a list of nodes average_BMI_change = 0.0 list_BMI_changes = [] average_weight_change = 0.0 list_weight_changes = [] average_percentage_weight_change = 0.0 list_percentage_weight_changes = [] average_activity = 0.0 # ojo! sera dividida por el numero de dias!!!!! list_activities = [] for n in G.neighbors(node): if int(G.node[n]['time_in_system']) > time_in_system: # list_BMI_changes.append(float(G.node[n]['final_BMI'])-float(G.node[n]['initial_BMI'])) list_weight_changes.append(float(G.node[n]['weight_change'])) list_percentage_weight_changes.append( float(G.node[n]['percentage_weight_change'])) list_activities.append( float(G.node[n]['activity']) / float(G.node[n]['time_in_system'])) if n not in list_all_nodes_: list_all_nodes_.append(n) list_all_.append( float(G.node[n]['percentage_weight_change'])) #averages average_weight_change = numpy.mean(list_weight_changes) # average_BMI_change=numpy.mean(list_BMI_changes) average_activity = numpy.mean(list_activities) average_percentage_weight_change = numpy.mean( list_percentage_weight_changes) #standard deviation #deviation_BMI=numpy.std(list_BMI_changes) deviation_weight = numpy.std(list_weight_changes) deviation_percentage_weight = numpy.std(list_percentage_weight_changes) deviation_activity = numpy.std(list_activities) #print out name2 = dir + name + "_ego_R6s_average_weight_change_" + str( time_in_system) + "days.dat" file2 = open(name2, 'at') print >> file2, cont, G.node[node]['role'], G.node[node]['label'], len( G.neighbors(node)), average_weight_change, deviation_weight file2.close() name22 = dir + name + "_ego_R6s_average_percentage_weight_change_" + str( time_in_system) + "days.dat" file22 = open(name22, 'at') print >> file22, cont, G.node[node]['role'], G.node[node][ 'label'], len( G.neighbors(node) ), average_percentage_weight_change, deviation_percentage_weight file22.close() name3 = dir + name + "_ego_R6s_average_activity_" + str( time_in_system) + "days.dat" file3 = open(name3, 'at') print >> file3, cont, G.node[node]['role'], G.node[node]['label'], len( G.neighbors(node)), average_activity, deviation_activity file3.close() cont = cont + 1 who = "R6s_egonetworks_all" Nbins = 18 histograma(list_all_, Nbins, dir, name, who) # print "intersection:",len(set(list_all_)&set(list_all)),len(list_all_),len(list_all) #############just checking what happens if we remove the 40155 guy ##### percent weight change vs. role: list_roles = ["R1", "R2", "R3", "R4", "R5", "R6", "R7"] file = open(dir + name + "_percentage_weight_change_vs_role", 'wt') cont = 1 for role in list_roles: list_weight_changes_role = [] for n in G.nodes(): if G.node[n]['role'] == role: list_weight_changes_role.append( G.node[n]['percentage_weight_change']) print >> file, cont, role, len(list_weight_changes_role), numpy.mean( list_weight_changes_role), numpy.std(list_weight_changes_role) cont += 1 file.close() ############################# ############## percentage weight change vs k x_positions_fit = [] y_positions_fit = [] cum_size_set = float(len(G.nodes())) list_of_lists_for_bootstrap = [] list_k = [] for n in G.nodes(): list_k.append(len(G.neighbors(n))) max_k = max(list_k) file = open(dir + name + "_percentage_weight_change_vs_k.dat", 'wt') max_k = max_k + 1 for k in range(1, max_k): num_users_set = cum_size_set list_percent_weight_change_k = [] for n in G.nodes(): if len(G.neighbors(n)) == k: list_percent_weight_change_k.append( G.node[n]['percentage_weight_change']) cum_size_set -= 1.0 if len(list_percent_weight_change_k) > 0: print >> file, k, len( list_percent_weight_change_k), num_users_set / float( len(G.nodes())), numpy.mean( list_percent_weight_change_k), numpy.std( list_percent_weight_change_k) if len(x_positions_fit) <= 7: x_positions_fit.append(k) y_positions_fit.append( numpy.mean(list_percent_weight_change_k)) list_of_lists_for_bootstrap.append( list_percent_weight_change_k) slope, intercept, Corr_coef, p_value, std_err = stats.linregress( x_positions_fit, y_positions_fit) # least squeares polinomial fit print "result linear. fit for degree dependency:" print "slope:", slope, "intercept:", intercept, "Corr_coef:", Corr_coef, "p_value:", p_value, "std_err:", std_err file.close() name11 = dir + name + "_fits_degree.dat" file11 = open(name11, 'wt') for i in range(len(x_positions_fit)): print >> file11, x_positions_fit[ i], intercept + x_positions_fit[i] * slope print >> file11, "\n\n", "y=", intercept, "+", slope, "*x", print "Bootstrap for degree:\n" mean_slope, standard_dev = bootstrap(x_positions_fit[0], x_positions_fit[-1], list_of_lists_for_bootstrap) zscore = (slope - mean_slope) / standard_dev print >> file11, "bootstrap:\n", "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore print x_positions_fit[0], x_positions_fit[ -1], "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore file11.close() contador += 1 file = open(name12, 'at') print >> file, contador, mean_slope, standard_dev, "degree" file.close() ######################################## new_name = graph_name.split(".gml")[0] new_name = new_name + "_adherent_num_R6s_largest_clique.gml" nx.write_gml(G, new_name)
def triangulate(self, heuristic='H6', order=None, inplace=False): """ Triangulate the graph. If order of deletion is given heuristic algorithm will not be used. Parameters ---------- heuristic: H1 | H2 | H3 | H4 | H5 | H6 The heuristic algorithm to use to decide the deletion order of the variables to compute the triangulated graph. Let X be the set of variables and X(i) denotes the i-th variable. S(i): The size of the clique created by deleting the variable. E(i): Cardinality of variable X(i). M(i): The maximum size of the cliques of the subgraph given by X(i) and its adjacent nodes. C(i): The sum of the size of cliques of the subgraph given by X(i) and its adjacent nodes. The heuristic algorithm decide the deletion order if this way: H1: Delete the variable with minimal S(i). H2: Delete the variable with minimal S(i)/E(i). H3: Delete the variable with minimal S(i) - M(i). H4: Delete the variable with minimal S(i) - C(i). H5: Delete the variable with minimal S(i)/M(i). H6: Delete the variable with minimal S(i)/C(i). order: list, tuple (array-like) The order of deletion of the variables to compute the triagulated graph. If order is given heuristic algorithm will not be used. inplace: True | False if inplace is true then adds the edges to the object from which it is called else returns a new object. Reference --------- http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.56.3607 """ graph_copy = nx.Graph(self.edges()) edge_set = set() if not order: order = [] for index in range(self.number_of_nodes()): for node in graph_copy.nodes(): S = {} graph_working_copy = nx.Graph(graph_copy.edges()) graph_working_copy.add_edges_from(itertools.combinations(graph_working_copy.neighbors(node), 2)) graph_working_copy.remove_node(node) clique_dict = nx.cliques_containing_node(graph_working_copy, nodes=graph_copy.neighbours(node)) def _common_list(*lists): common = [sorted(li) for li in lists[0]] for i in range(1, len(lists)): list1 = [sorted(li) for li in lists[i]] for list2 in common: if list2 not in list1: common.remove(list2) return common S[node] = _common_list(*list(clique_dict.values())) if heuristic == 'H1': node_to_delete = min(S, key=S.get) elif heuristic == 'H2': S_by_E = {S[key]/self.cardinality[key] for key in S} node_to_delete = min(S_by_E, key=S_by_E.get) elif heuristic in ('H3', 'H5'): M = {} for node in graph_copy.nodes(): graph_working_copy = nx.Graph(graph_copy.edges()) neighbors = graph_working_copy.neighbors(node) graph_working_copy.add_edges_from(itertools.combinations(neighbors, 2)) graph_working_copy.remove_node(node) cliques = nx.cliques_containing_node(graph_working_copy, nodes=neighbors) common_clique = list(cliques.values())[0] for values in cliques.values(): common_clique = [value for value in common_clique if value in values] M[node] = np.prod([self.cardinality[node] for node in common_clique[0]]) if heuristic == 'H3': S_minus_M = {S[key] - M[key] for key in S} node_to_delete = min(S_minus_M, key=S_minus_M.get) else: S_by_M = {S[key]/M[key] for key in S} node_to_delete = min(S_by_M, key=S_by_M.get) else: C = {} for node in graph_copy.nodes(): graph_working_copy = nx.Graph(graph_copy.edges()) neighbors = graph_working_copy.neighbors(node) graph_working_copy.add_edges_from(itertools.combinations(neighbors, 2)) graph_working_copy.remove_node(node) cliques = nx.cliques_containing_node(graph_working_copy, nodes=neighbors) common_clique = list(cliques.values())[0] for values in cliques.values(): common_clique = [value for value in common_clique if value in values] clique_size_sum = 0 for r in range(1, len(common_clique)+1): for clique in itertools.combinations(common_clique, r): clique_size_sum += np.prod([self.cardinality[node] for node in clique]) C[node] = clique_size_sum if heuristic == 'H4': S_minus_C = {S[key] - C[key] for key in S} node_to_delete = min(S_minus_C, key=S_minus_C.get) else: S_by_C = {S[key]/C[key] for key in S} node_to_delete = min(S_by_C, key=S_by_C.get) order.append(node_to_delete) graph_copy = nx.Graph(self.edges()) for node in order: for edge in itertools.combinations(graph_copy.neighbors(node), 2): graph_copy.add_edge(edge[0], edge[1]) edge_set.add(edge) graph_copy.remove_node(node) if inplace: for edge in edge_set: self.add_edge(edge[0], edge[1]) return self else: graph_copy = nx.copy(self) for edge in edge_set: self.add_edge(edge[0], edge[1]) return graph_copy
# lvl2.append(graphs_len[i]) print "Η λίστα των μεγεθών των κλικών είναι:" # print 'The list of clique sizes is:' print lvl2 print str(" ") print "Ο αριθμός κλίκας (το μέγεθος της μεγαλύτερης κλίκας) του G είναι:", nx.graph_clique_number(G) # print 'The clique number (size of the largest clique) for G is:', nx.graph_clique_number(G) # print sorted(nx.connected_components(G), key = len, reverse=True) print str(" ") print "Το λεξικό των κλικών που περιέχουν κάθε κόμβο είναι:" # print 'The dictionary of the lists of cliques containing each node:' print nx.cliques_containing_node(G) print str(" ") print "Το λεξικό του πλήθους κλικών που περιέχουν κάθε κόμβο είναι:" # print 'The dictionary of the numbers of maximal cliques for each node:' print nx.number_of_cliques(G) print str(" ") print "Το λεξικό του μεγέθους των μεγαλύτερων κλικών που περιέχουν κάθε κόμβο είναι:" # print 'The dictionary of the sizes of the largest maximal cliques containing each given node:' print nx.node_clique_number(G) print str(" ") maxclique = [clq for clq in nx.find_cliques(G) if len(clq) == nx.graph_clique_number(G)] nodes = [n for clq in maxclique for n in clq] H = G.subgraph(nodes)
with open("twitter_network.dat",mode="rb") as my_file: graph_data = pickle.load(my_file) follower_user_data = graph_data["follower_user_data"]#followers of followers screen_names followers = graph_data["followers"] #my followers followers_screen_names = graph_data["followers_screen_names"] #screen_names of my followers follower_data = graph_data["follower_data"] #followers of followers for follower in followers_screen_names: twitter_graph.add_edge(main_user,follower["screen_name"].lower()) for each,value in follower_user_data.items(): for name in value: twitter_graph.add_edge(follower["screen_name"].lower(),name["screen_name"].lower()) # #set positions pos = nx.random_layout(twitter_graph) plt.figure(figsize=(16,10)) nx.draw_networkx_nodes(twitter_graph,pos,node_size=30) nx.draw_networkx_edges(twitter_graph,pos,alpha=0.01) #get the nodes that we want to draw labels for clique = nx.cliques_containing_node(twitter_graph,nodes=[main_user]) clique = clique[main_user][0] clique.append(main_user) labels = {} for name in clique: labels[name] = name nx.draw_networkx_labels(twitter_graph,pos,font_size=16,labels=labels) plt.show()
def tree_decomposition(input_graph): current_graph=input_graph.copy() decomposition_tree_vertices=list() counter=0; decomposition_tree=nx.Graph() tree_connectivity_dictionary=dict() for graph_vertex in current_graph.nodes(): tree_connectivity_dictionary[graph_vertex]=[] while current_graph.order()>0: print current_graph.order() nodes_sorted_by_degree=sort_by_degree(current_graph) print 'nodes_sorted_by_degree', nodes_sorted_by_degree minimum_degree_vertex=nodes_sorted_by_degree[0][0] print 'Minimum Degree_vertex' , minimum_degree_vertex cliques_of_minimum_degree_vertex=nx.cliques_containing_node(current_graph,minimum_degree_vertex) print 'cliques_of_minimum_degree_vertex',cliques_of_minimum_degree_vertex number_of_cliques_containing_vertex=len(cliques_of_minimum_degree_vertex) print 'number_of_cliques_containing_vertex', number_of_cliques_containing_vertex minimum_degree_vertex_neighbors=current_graph.neighbors(minimum_degree_vertex) print 'minimum_degree_vertex_neighbors', minimum_degree_vertex_neighbors new_tree_vertex=[minimum_degree_vertex] print 'new_tree_vertex First element: ',new_tree_vertex new_tree_vertex.extend(minimum_degree_vertex_neighbors) new_tree_vertex=tuple(new_tree_vertex) decomposition_tree.add_node(new_tree_vertex) print 'decomposition_tree_vertices',decomposition_tree.nodes() if number_of_cliques_containing_vertex>1: print 'Not Clique, will remove only one vertex' pairs_of_neighbors=make_pairs(minimum_degree_vertex_neighbors) print 'pairs_of_neighbors',pairs_of_neighbors for additional_edge in pairs_of_neighbors:current_graph.add_edge(additional_edge[0],additional_edge[1]) toberemoved=[minimum_degree_vertex] print 'toberemoved ', toberemoved else: toberemoved=[minimum_degree_vertex] print 'Clique detected, will try to remove more than one vertex' number_of_clique_edges_per_vertex=len(minimum_degree_vertex_neighbors) print 'number_of_clique_edges_per_vertex',number_of_clique_edges_per_vertex print 'Checking all the vertex`s neighbors...' print 'minimum_degree_vertex_neighbors', minimum_degree_vertex_neighbors for temp_vertex in minimum_degree_vertex_neighbors: if current_graph.degree(temp_vertex)==number_of_clique_edges_per_vertex: toberemoved.append(temp_vertex) print 'Will ALSO remove vertex ', temp_vertex for graph_vertex in new_tree_vertex: if graph_vertex in toberemoved: current_graph.remove_node(graph_vertex) print 'Removed original graph vertex', graph_vertex tree_vertices_waiting=tree_connectivity_dictionary[graph_vertex] print 'For the removed node, tree_vertices_waiting: ' , tree_vertices_waiting for tree_vertex_waiting in tree_vertices_waiting: print 'New Tree vertex: ' , new_tree_vertex print 'Tree Vertex waiting:', tree_vertex_waiting decomposition_tree.add_edge(new_tree_vertex,tree_vertex_waiting) print 'Connected tree vertices', new_tree_vertex, 'and ' , tree_vertex_waiting print 'The tree edges are now: ', decomposition_tree.edges() print 'THE NUMBER OF TREE EDGES ARE NOW: ', len(decomposition_tree.edges()) for tree_vertex_waiting in tree_vertices_waiting: common_graph_nodes_between_tree_vertices=list(my_very_simple_tuple_intersection(new_tree_vertex,tree_vertex_waiting)) for graph_vertex in common_graph_nodes_between_tree_vertices: tree_connectivity_dictionary[graph_vertex].remove(tree_vertex_waiting) print 'Removed from dictionary entry', graph_vertex , 'tree node ', tree_vertex_waiting print 'Now the new dictionary is: ' , tree_connectivity_dictionary else: tree_connectivity_dictionary[graph_vertex].append(new_tree_vertex) print 'New tree_connectivity_dictionary node appended. New tree_connectivity_dictionary ', tree_connectivity_dictionary print 'tree_connectivity_dictionary: ' , tree_connectivity_dictionary print 'decomposition_tree.nodes: ', decomposition_tree.nodes() print 'decomposition_tree.edges: ', decomposition_tree.edges() return decomposition_tree