def printStatistics(): # Reply graph Traid print() for i in range(ClusteringTweet.tweetDataFrame.groupby('group').nunique().shape[0]): tabletitleString = "|Group Number|Interaction Type|" tabletitleBottom = "|:------------:|:------------:|" for triadic_type in (nx.triadic_census(HashtagInteractionGraph.nxGraphSeperatedByGroup[1]["retweet"])).keys(): tabletitleString += triadic_type + "|" tabletitleBottom += ":-----------------:|" print(tabletitleString) print(tabletitleBottom) tabletitlerow = "|" + str(i) + "|retweet|" triadic_cencus = nx.triadic_census(HashtagInteractionGraph.nxGraphSeperatedByGroup[i]["retweet"]).values() for value in triadic_cencus: tabletitlerow += str(value) + "|" print(tabletitlerow) tabletitlerow = "|" + str(i) + "|reply|" triadic_cencus = nx.triadic_census(HashtagInteractionGraph.nxGraphSeperatedByGroup[i]["reply"]).values() for value in triadic_cencus: tabletitlerow += str(value) + "|" print(tabletitlerow) print() print()
def get_similar_topics(graph_1, graph_2): c1 = paint_communities(graph_1, paint=False) c2 = paint_communities(graph_2, paint=False) com1 = from_DtoD(c1) com2 = from_DtoD(c2) # solo para grafos dirigidos triads1 = [ (c, nx.triadic_census(graph_1.subgraph(com1[c]).to_directed()).values()) for c in com1.keys() ] triads2 = [ (c, nx.triadic_census(graph_2.subgraph(com2[c]).to_directed()).values()) for c in com2.keys() ] best_com_pairs = [] for c, vector in triads1: nparray = np.array(list(vector)) suma = nparray.sum() aux = [(k, abs(nparray - np.array(list(v2))).sum(), suma + np.array(list(v2)).sum()) for k, v2 in triads2] aux = [(graph_1.subgraph(com1[c]), graph_2.subgraph(com2[k])) for k, v, s in aux if (s != 0 and v / s <= 0.5) or s == 0] best_com_pairs.extend(aux) # compute adjacency matrix to all communities in the first graph # all_ady_mtrx = [] # for k, v in com1.items(): # A = nx.adjacency_matrix(graph_1, v) # # print(A.todense()) # all_ady_mtrx.append((k, A.todense())) # # print("----------------------------") # all_ady_mtrx.sort(key=lambda elem: len(elem[1])) # # print(all_ady_mtrx) # # best_com_pairs = [] # for k, v in com2.items(): # A = nx.adjacency_matrix(graph_2, v) # A = A.todense() # best_c, best_ady = get_closest_ady(all_ady_mtrx, len(A)) # # print(k, best_c) # dist = hamming(best_ady, A) # max_edges = count_ones(A) + count_ones(best_ady) # if dist / max_edges <= 0.6: # best_com_pairs.append((graph_2.subgraph(com2[k]), graph_1.subgraph(com1[best_c]))) # # print("----------------------------") return best_com_pairs
def test_triadic_census(): """Tests the triadic_census function.""" G = nx.DiGraph() G.add_edges_from( ["01", "02", "03", "04", "05", "12", "16", "51", "56", "65"]) expected = { "030T": 2, "120C": 1, "210": 0, "120U": 0, "012": 9, "102": 3, "021U": 0, "111U": 0, "003": 8, "030C": 0, "021D": 9, "201": 0, "111D": 1, "300": 0, "120D": 0, "021C": 2, } actual = nx.triadic_census(G) assert expected == actual
def get_tc(csv_name): """ The method computes the triad census on the uploaded file. :param csv_name: CSV filename :type csv_name: String :return: list containing dicts having keys: tn and count representing triad name and triad count :rtype: list """ G = nx.MultiDiGraph() # create a Supergeil Graph with open(csv_name) as csv_file: # Open the file _ = csv_file.readline() # Ignore first line (Header) for line in csv_file.readlines(): # Iterates on the whole file line = line.rstrip() # Ignore the final /n nodo1, nodo2 = line.split( ",") # Split the two values and put them in two variables if not G.has_node( nodo1 ): # If the first node is not already in the graph will add it G.add_node(nodo1) if not G.has_node( nodo2 ): # If the second node is not already in the graph will add it G.add_node(nodo2) G.add_edge(nodo1, nodo2) # Create the edge based on the two nodes final_triad = nx.triadic_census(G) return sorted([{ 'tn': k, 'count': v } for k, v in final_triad.items()], key=lambda k: k['tn'])
def test_triadic_census(): """Tests the triadic census function.""" G = nx.DiGraph() G.add_edges_from( ['01', '02', '03', '04', '05', '12', '16', '51', '56', '65']) expected = { '030T': 2, '120C': 1, '210': 0, '120U': 0, '012': 9, '102': 3, '021U': 0, '111U': 0, '003': 8, '030C': 0, '021D': 9, '201': 0, '111D': 1, '300': 0, '120D': 0, '021C': 2 } actual = nx.triadic_census(G) assert expected == actual
def test_triadic_census_nodelist(): """Tests the triadic_census function.""" G = nx.DiGraph() G.add_edges_from( ["01", "02", "03", "04", "05", "12", "16", "51", "56", "65"]) expected = { "030T": 2, "120C": 1, "210": 0, "120U": 0, "012": 9, "102": 3, "021U": 0, "111U": 0, "003": 8, "030C": 0, "021D": 9, "201": 0, "111D": 1, "300": 0, "120D": 0, "021C": 2, } actual = {k: 0 for k in expected} for node in G.nodes(): node_triad_census = nx.triadic_census(G, nodelist=[node]) for triad_key in expected: actual[triad_key] += node_triad_census[triad_key] # Divide the total count of 003 triads by 3, since we are counting them thrice actual["003"] //= 3 assert expected == actual
def triads(self): rslt = {} if self.directed == 'directed': rslt['triadic_census'] = nx.triadic_census(self.graph) fname_triads = self.DIR + '/triads.json' with open(fname_triads, "w") as f: json.dump(rslt, f, cls=SetEncoder, indent=2) print(fname_triads)
def directed_triadic_census(graph, file_name, save_graphs): # Calculate census print("\nCalculating triadic census...") triadic_census = nx.triadic_census(graph) print("Done!\n") # Output to console print("A<-B->C triads: %d" % triadic_census["021D"]) print("A->B<-C triads: %d" % triadic_census["021U"]) print("A->B->C triads: %d" % triadic_census["021C"]) print("A<->B<-C triads: %d" % triadic_census["111D"]) print("A<->B->C triads: %d" % triadic_census["111U"]) print("A->B<-C,A->C triads: %d" % triadic_census["030T"]) print("A<-B<-C,A->C triads: %d" % triadic_census["030C"]) print("A<->B<->C triads: %d" % triadic_census["201"]) print("A<-B->C,A<->C triads: %d" % triadic_census["120D"]) print("A->B<-C,A<->C triads: %d" % triadic_census["120U"]) print("A->B->C,A<->C triads: %d" % triadic_census["120C"]) print("A->B<->C,A<->C triads: %d" % triadic_census["210"]) print("A<->B<->C,A<->C triads: %d" % triadic_census["300"]) # Output to graph if(save_graphs == 1): values = [] labels = [] values.append(triadic_census["021D"]) labels.append("A<-B->C") values.append(triadic_census["021U"]) labels.append("A->B<-C") values.append(triadic_census["021C"]) labels.append("A->B->C") values.append(triadic_census["111D"]) labels.append("A<->B<-C") values.append(triadic_census["111U"]) labels.append("A<->B->C") values.append(triadic_census["030T"]) labels.append("A->B<-C,A->C") values.append(triadic_census["030C"]) labels.append("A<-B<-C,A->C") values.append(triadic_census["201"]) labels.append("A<->B<->C") values.append(triadic_census["120D"]) labels.append("A<-B->C,A<->C") values.append(triadic_census["120U"]) labels.append("A->B<-C,A<->C") values.append(triadic_census["120C"]) labels.append("A->B->C,A<->C") values.append(triadic_census["210"]) labels.append("A->B<->C,A<->C") values.append(triadic_census["300"]) labels.append("A<->B<->C,A<->C") plt.figure() plt.bar(range(len(values)), values, align='center', alpha=0.5) plt.xticks(range(len(labels)), labels, rotation=60) plt.savefig("graphs/"+file_name+".png")
def no_triads_per_type(self): """ :return: Returns the triad count for each selected triad within the given graph, normalized by dividing by the number of nodes within the graph. """ triad_dict = nx.triadic_census(self.graph) return { triad_type: triad_dict[triad_type] / binom(self.no_nodes, 3) for triad_type in triad_dict.keys() }
def test_triadic_census(): """Tests the triadic census function.""" G = nx.DiGraph() G.add_edges_from(['01', '02', '03', '04', '05', '12', '16', '51', '56', '65']) expected = {'030T': 2, '120C': 1, '210': 0, '120U': 0, '012': 9, '102': 3, '021U': 0, '111U': 0, '003': 8, '030C': 0, '021D': 9, '201': 0, '111D': 1, '300': 0, '120D': 0, '021C': 2} actual = nx.triadic_census(G) assert_equal(expected, actual)
def triadSignificanceProfile(G, triad_cfg): """ Compute the significance profile of the patterns mapped in triad_cfg, inside directed graph G. - G : directed graph representing the network; - triads_cfg : dict mapping interesting triadic patterns codes, as in nx.triadic_census(), with explicit names. (e.g. triad_cfg = {'003' : 'Null', '012' : 'Single-edge'}) """ census = nx.triadic_census(G) in_degree_sequence = [d for n, d in G.in_degree()] # in degree sequence out_degree_sequence = [d for n, d in G.out_degree()] # out degree sequence #print("In_degree sequence %s" % in_degree_sequence) #print("Out_degree sequence %s" % out_degree_sequence) random_nets_census = [] for i in range(100): rand_G = nx.directed_configuration_model(in_degree_sequence, out_degree_sequence, create_using=nx.DiGraph, seed=i) random_nets_census.append(nx.triadic_census(rand_G)) real_census, random_census = mapTriadCodes(census,random_nets_census,triad_cfg) #print(real_census) #print(random_census) z_score = [] for p in real_census.keys(): print(p) N_real_p = real_census[p] N_rand_p = np.mean(random_census[p]) std = np.std(random_census[p]) z_p = ((N_real_p - N_rand_p)/std if std != 0 else 0) z_score.append(z_p) SP = [] for i in range(len(z_score)): z_norm = np.linalg.norm(z_score) norm_z_score = (z_score[i]/z_norm if z_norm != 0 else z_score[i]) SP.append(round(norm_z_score,4)) #print(SP) return SP
def get_dyad_triad_census(G, prefix=""): dyads_census = dyadic_census(G) dyads_census = {f"{prefix}_D_{k}": v for k, v in dyads_census.items()} triads_census = networkx.triadic_census(G) triads_census = {f"{prefix}_T_{k}": v for k, v in triads_census.items()} census = {**dyads_census, **triads_census} return census
def run(self): motifs = nx.triadic_census(self.graph) self.lock.acquire() for motif in motifs.keys(): try: self.output[motif] += motifs[motif] except KeyError: self.output[motif] = motifs[motif] self.lock.release()
def extract_triads(play: dict, tris: list): DG = nx.DiGraph(big_dimat_df(play)) ds = nx.triadic_census(DG) combs = itertools.combinations(DG.nodes, 3) res = [] for u, v, w in combs: triname = nx.algorithms.triads.TRICODE_TO_NAME[ nx.algorithms.triads._tricode(DG, u, v, w)] if triname in tris: res.append((( u, v, w, ), triname)) return res
def oper_num(num): mp_dic = {} g1 = nx.directed_configuration_model(in_degree_sequence, out_degree_sequence) tmp_dic = nx.triadic_census(g1) recip_ratio = nx.overall_reciprocity(g1) total_nodes = g1.number_of_nodes() total_edges = g1.number_of_edges() double_link = recip_ratio * total_edges sigle_dyad = total_edges - double_link mutual_dyad = double_link / 2 null_dyad = total_nodes * (total_nodes - 1) / 2 - sigle_dyad - mutual_dyad tmp_dic['null_dyad'] = null_dyad tmp_dic['sigle_dyad'] = sigle_dyad tmp_dic['mutual_dyad'] = mutual_dyad return tmp_dic
def triadic_census(self): return nx.triadic_census(self.G)
def network_analysis(storage_collection, plot_data=True): # collect all mentions with relation to users user_mentions_tweets = {} user_mentions_retweets = {} user_mentions_quotes = {} # collect all hashtags with relation to another hashtags user_hashtags_tweets = [] user_hashtags_retweets = [] user_hashtags_quotes = [] # collect all dates when mention was created dates_mentions_tweets = [] dates_mentions_retweets = [] dates_mentions_quotes = [] # collect apropriate data from each tweet/retweet/quote for data in storage_collection.find(): if data['is_quote']: # track when there is a mention in a quote if data['mentions']: dates_mentions_quotes.extend( [data['created'] for term in data["mentions"]]) # append pair of hashtags in a quote user_hashtags_quotes.extend([(x['text'], y['text']) for x in data['hashtags'] for y in data['hashtags'] if x['text'] != y['text']]) # append data about the user mentions for the user who created the quote if data['username'] in user_mentions_quotes: user_mentions_quotes[data['username']] += [ term['screen_name'] for term in data["mentions"] ] else: user_mentions_quotes[data['username']] = [ term['screen_name'] for term in data["mentions"] ] elif data['is_retweet']: # track when there is a mention in a retweet if data['mentions']: dates_mentions_retweets.extend( [data['created'] for term in data["mentions"]]) # append pair of hashtags in a retweet user_hashtags_retweets.extend([(x['text'], y['text']) for x in data['hashtags'] for y in data['hashtags'] if x['text'] != y['text']]) # append data about the user mentions for the user who created the retweet if data['username'] in user_mentions_retweets: user_mentions_retweets[data['username']] += [ term['screen_name'] for term in data["mentions"] ] else: user_mentions_retweets[data['username']] = [ term['screen_name'] for term in data["mentions"] ] else: # track when there is a mention in a tweet if data['mentions']: dates_mentions_tweets.extend( [data['created'] for term in data["mentions"]]) # append pair of hashtags in a tweet user_hashtags_tweets.extend([(x['text'], y['text']) for x in data['hashtags'] for y in data['hashtags'] if x['text'] != y['text']]) # append data about the user mentions for the user who created the tweet if data['username'] in user_mentions_tweets: user_mentions_tweets[data['username']] += [ term['screen_name'] for term in data["mentions"] ] else: user_mentions_tweets[data['username']] = [ term['screen_name'] for term in data["mentions"] ] # count the frequencies of mentions with relation to users for each group ( tweet, reteet, quote) user_interactions_tweets = {} user_interactions_retweets = {} user_interactions_quotes = {} for keys, values in user_mentions_tweets.items(): user_interactions_tweets[keys] = Counter(values) for keys, values in user_mentions_retweets.items(): user_interactions_retweets[keys] = Counter(values) for keys, values in user_mentions_quotes.items(): user_interactions_quotes[keys] = Counter(values) # remove duplicates from the hashtags interaction list user_hashtags_retweets = list(set(user_hashtags_retweets)) user_hashtags_tweets = list(set(user_hashtags_tweets)) user_hashtags_quotes = list(set(user_hashtags_quotes)) # function option from argument to plot data (when false the program is then faster) if plot_data: # plot the data for user interaction plot_directed_graph( user_interactions_tweets, "Interaction between users mentions in tweets", str(storage_collection.name) + "_user_interactions_tweets") plot_directed_graph( user_interactions_retweets, "Interaction between users mentions in retweets", str(storage_collection.name) + "_user_interactions_retweets") plot_directed_graph( user_interactions_quotes, "Interaction between users mentions in quotes", str(storage_collection.name) + "_user_interactions_quotes") # plot the data for hashtag interactions plot_undirected_graph( user_hashtags_tweets, "Interaction between hashtags in tweets", str(storage_collection.name) + "_hashtags_interactions_tweets") plot_undirected_graph( user_hashtags_retweets, "Interaction between hashtags in retweets", str(storage_collection.name) + "_hashtags_interactions_retweets") plot_undirected_graph( user_hashtags_quotes, "Interaction between hashtags in quotes", str(storage_collection.name) + "_hashtags_interactions_quotes") # plot data when the mentions were created for item, for_what in [(dates_mentions_tweets, "tweets"), (dates_mentions_retweets, "retweets"), (dates_mentions_quotes, "quotes")]: # a list of "1" to count the mentions ones = [1] * len(item) # the index of the series idx = pandas.DatetimeIndex(item) # the actual series (at series of 1s for the moment) mentions_data = pandas.Series(ones, index=idx) # Resampling / bucketing per_day = mentions_data.resample('D').sum().fillna(0) print(per_day) # plot data plot_time_results(per_day, "Number of ties in a day, for " + for_what, "Day", "Count", str(storage_collection.name) + "_ties_" + for_what) # collect all triads and dyads for tweets, retweets and quotes dyads = {} triads = {} for data, for_what in [(user_interactions_tweets, "tweets"), (user_interactions_retweets, "retweets"), (user_interactions_quotes, "quotes")]: # create graph to analyze triads G = nx.DiGraph() for key, value in data.items(): for item in value.items(): G.add_edges_from([(key, item[0])], weight=item[1]) triad_dict = nx.triadic_census(G) # create list of edges between users tuples = [] for key, value in data.items(): for item in value.items(): tuples.append((key, item[0])) # create graph to analyze ties G = ig.Graph.TupleList(tuples, directed=True) # get all dyads dc = G.dyad_census() dyad_dict = dc.as_dict() # append results dyads[for_what] = dyad_dict triads[for_what] = triad_dict print("Tweets mentions: ", user_interactions_tweets) print("Retweets mentions: ", user_interactions_retweets) print("Quotes mentions: ", user_interactions_quotes) print() print("Tweets hashtags: ", user_hashtags_tweets) print("Retweets hashtags: ", user_hashtags_retweets) print("Quotes hashtags: ", user_hashtags_quotes) print() print("Tweets ties count: ", len(dates_mentions_tweets)) print("Retweets ties count: ", len(dates_mentions_retweets)) print("Quotes ties count: ", len(dates_mentions_quotes)) print() print("Number of dyads: ", dyads) print("Number of triads: ", triads) # return data to be saved in log of mongoDB results = [ user_interactions_tweets, user_interactions_retweets, user_interactions_quotes, user_hashtags_tweets, user_hashtags_retweets, user_hashtags_quotes, dates_mentions_tweets, dates_mentions_retweets, dates_mentions_quotes, dyads, triads ] return results
def parse_all_metrics(api, edge_df, user_id, directory=None, long=False): ''' Will get all Tier 3 metrics for a user_id Parameters ---------- api : Tweepy API hook edge_df : Edgelist of Pandas DataFrame user_id : User ID string directory : Directory to look for data The default is None. long : Whether to get metrics that take a long time. The default is False. Returns ------- Feature Data Frame ''' import pandas as pd import twitter_col import json, io, gzip, os import time import progressbar import networkx as nx from collections import Counter import community import numpy as np # user_id = '1919751' G = nx.from_pandas_edgelist(edge_df, 'from', 'to', edge_attr=['type'], create_using=nx.DiGraph()) # G=nx.gnp_random_graph(100, 0.4, seed=None, directed=True) G2 = G.to_undirected() largest_component = max(nx.connected_component_subgraphs(G2), key=len) print("Nodes in largest compo:", len(largest_component.nodes)) data = { "user_id": [], "scrape_date": [], "num_nodes": [], "num_links": [], "density": [], "isolates": [], "dyad_isolates": [], "triad_isolates": [], "compo_over_4": [], # "average_shortest_path_length": [], "clustering_coefficient": [], "transitivity": [], # "network_diameter": [], "reciprocity": [], "graph_degree_centrality": [], "graph_betweenness_centrality": [], "mean_eigen_centrality": [], "simmelian_ties": [], "triad_003": [], "triad_012": [], "triad_102": [], "triad_021D": [], "triad_021U": [], "triad_021C": [], "triad_111D": [], "triad_111U": [], "triad_030T": [], "triad_030C": [], "triad_201": [], "triad_120D": [], "triad_120U": [], "triad_120C": [], "triad_210": [], "triad_300": [], "num_louvaine_groups": [], "size_largest_louvaine_group": [], "ego_effective_size": [] } if long: data.pop("graph_betweenness_centrality") data.pop("ego_effective_size") data.pop("simmelian_ties") data['user_id'].append(user_id) data['scrape_date'].append(time.strftime('%Y%m%d-%H%M%S')) data['num_nodes'].append(nx.number_of_nodes(G)) data['num_links'].append(nx.number_of_edges(G)) data['density'].append(nx.density(G)) compo_sizes = [ len(c) for c in sorted(nx.connected_components(G2), key=len, reverse=True) ] compo_freq = Counter(compo_sizes) # print('isolates') data['isolates'].append(compo_freq[1]) # print('triad_islolates') data['triad_isolates'].append(compo_freq[3]) data['dyad_isolates'].append(compo_freq[2]) data['compo_over_4'].append(len([x for x in compo_sizes if x > 3])) # print('shortest path') # data['average_shortest_path_length'].append(nx.average_shortest_path_length(largest_component)) # print('clustering_coefficient') data['clustering_coefficient'].append(nx.average_clustering(G2)) # print('transitivity') data['transitivity'].append(nx.transitivity(G)) # print('diameter') # data['network_diameter'].append(nx.diameter(largest_component)) # print('reciprocity') data['reciprocity'].append(nx.reciprocity(G)) # print('effective size') if not long: if user_id in list(G.nodes): ef = nx.effective_size(G, nodes=[user_id]) data['ego_effective_size'].append(ef[user_id]) else: data['ego_effective_size'].append(0) # print('degree') data['graph_degree_centrality'].append(graph_centrality(G, kind='degree')) # print('betweenness') if not long: data['graph_betweenness_centrality'].append( graph_centrality(largest_component, kind='betweenness')) # print('eigen_centrality') try: eig = list(nx.eigenvector_centrality_numpy(G).values()) data['mean_eigen_centrality'].append(np.mean(eig)) except: data['mean_eigen_centrality'].append(0) # print('simmelian') # if long: data['simmelian_ties'].append(get_simmelian_ties(G, sparse=True)) # print('census') census = nx.triadic_census(G) data['triad_003'].append(census['003']) data['triad_012'].append(census['012']) data['triad_102'].append(census['021C']) data['triad_021D'].append(census['021D']) data['triad_021U'].append(census['021U']) data['triad_021C'].append(census['030C']) data['triad_111D'].append(census['030T']) data['triad_111U'].append(census['102']) data['triad_030T'].append(census['111D']) data['triad_030C'].append(census['111U']) data['triad_201'].append(census['120C']) data['triad_120D'].append(census['120D']) data['triad_120U'].append(census['120U']) data['triad_120C'].append(census['201']) data['triad_210'].append(census['210']) data['triad_300'].append(census['300']) partition = community.best_partition(G2) p_df = pd.DataFrame.from_dict(partition, orient='index') # print('louvaine') data['num_louvaine_groups'].append(len(set(partition.values()))) data['size_largest_louvaine_group'].append(p_df[0].value_counts().max()) df = pd.DataFrame(data) return (df)
graph_centrality = nx.degree_centrality(largest_subgraph) max_degree = max(graph_centrality.items(), key=itemgetter(1)) graph_closeness = nx.closeness_centrality(largest_subgraph) max_closeness = max(graph_closeness.items(), key=itemgetter(1)) graph_betweenness = nx.betweenness_centrality(largest_subgraph, normalized=True, endpoints=False) max_bet = max(graph_betweenness.items(), key=itemgetter(1)) # Change the graph to directed to get the triads directed_graph = nx.triadic_census(graph.to_directed()) print(directed_graph) node_and_degree = largest_subgraph.degree() colors_central_nodes = ['blue', 'red'] central_nodes = [max_degree[0], max_closeness[0]] pos = nx.spring_layout(largest_subgraph, k=0.05) plt.figure(figsize=(20, 20)) nx.draw(largest_subgraph, pos=pos, edge_color="black", linewidths=0.3, node_size=60, alpha=0.6,
metrics_file.write( "Todes network average cluster coeff: {} \n\n".format(t_cluster_coeff)) # ---------------------------------------------------------------------------- # # NETWORK DENSITY # ---------------------------------------------------------------------------- # logging.info("calculating network density") with open("network_metrics.txt", 'a') as metrics_file: metrics_file.write("Latinx Density: {}\n\n".format(nx.density(latinx_g))) metrics_file.write("Todes Density: {}\n\n".format(nx.density(todes_g))) # ---------------------------------------------------------------------------- # # TRIADIC CENSUS # ---------------------------------------------------------------------------- # logging.info("calculating triadic census for latinx & todes") lx_triad_census = nx.triadic_census(latinx_g) te_triad_census = nx.triadic_census(todes_g) with open("network_metrics.txt", 'a') as metrics_file: metrics_file.write("Latinx Triadic Census:\n") for k, v in lx_triad_census.items(): metrics_file.write((str(k) + ' : ' + str(v) + "\n")) metrics_file.write("\n\n") metrics_file.write("Todes Triadic Census:\n") for k, v in te_triad_census.items(): metrics_file.write((str(k) + ' : ' + str(v) + "\n")) metrics_file.write("\n\n") # ---------------------------------------------------------------------------- # # DUMP network_metrics.txt to GCP Cloud Storage
member.expertise[member.tasks[0]] <= 0.5: if any(G.successors(member)): for expert in G.successors(member): if G[member][expert]['color'] == \ colors[member.tasks[0]] and \ member not in expert.waitlist: expert.waitlist.append(member) # for those who cannot do his own tasks, check if he can help others for member in team: if member.tasks != [] and \ member.availability and \ any(member.waitlist): del member.waitlist[0].tasks[0] del member.waitlist[0] steps = steps + 1 with open("output.csv", 'w') as csvfile: writer = csv.writer(csvfile) for n in range(1000): G, team, number_of_areas, number_of_agents = create_TMS(10, 5, 10) productivity_TMS(10) for i in team: result = writer.writerow([i.tertius, i.steps]) ''' TMS triadic census ''' nx.triadic_census(G)
def inspect_graph(dataset): g = nx.read_edgelist(dataset, create_using=nx.Graph(), nodetype=int) directed_g = nx.read_edgelist(dataset, create_using=nx.DiGraph(), nodetype=int) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('GRAPH INFORMATION for \n' + dataset) print(nx.info(g)) n = g.number_of_nodes() # print(g.edges()) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('DEGREE CENTRALITY\n') print(nx.degree_centrality(g)) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('EIGEN VECTOR CENTRALITY\n') print(nx.eigenvector_centrality(g)) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('KATZ CENTRALITY\n') print(nx.katz_centrality(g)) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('PAGERANK\n') print(nx.pagerank(g)) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('BETWEENNESS CENTRALITY\n') print(nx.betweenness_centrality(g)) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('CLOSENESS CENTRALITY\n') print(nx.closeness_centrality(g)) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('LOCAL CLUSTERING COEFFICIENT\n') print(nx.clustering(g)) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('AVERAGE CLUSTERING COEFFICIENT\n') print(nx.average_clustering(g)) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('GLOBAL CLUSTERING COEFFICIENT\n') # triangles = sum(nx.triangles(g).values()) traingles_set = {'210', '300', '120C', '030C', '120U', '030T', '120D'} open_traids_set = {'111D', '201', '021D', '111U', '021U', '021C'} traids_dict = nx.triadic_census(directed_g) open_traids = 0 triangles = 0 for key in traids_dict: if key in open_traids_set: open_traids += traids_dict[key] if key in traingles_set: triangles += traids_dict[key] GCC = (3 * triangles) / ((3 * triangles) + open_traids) print(GCC) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('RECIPROCITY\n') print(nx.overall_reciprocity(g)) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('TRANSITIVITY\n') print(nx.transitivity(g)) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('GIANT COMPONENT\n') n_g = 0 for component in nx.connected_components(g): n_g = max(n_g, len(component)) print('Size of the giant component - ' + str(n_g)) print( '\n----------------------------------------------------------------------------------------------------------------------\n' ) print('PLOT\n') x = [] y = [] k = 0 while (k <= 5): x.append(k) p = k / n g_random = nx.gnp_random_graph(n, p) random_n = g_random.number_of_nodes() random_n_g = 0 for component in nx.connected_components(g_random): random_n_g = max(random_n_g, len(component)) y.append(random_n_g / random_n) k += 0.1 # print(x) # print(y) plot(x, y, "Average Degree", "N_G/N ratio") print( '\n----------------------------------------------------------------------------------------------------------------------\n' )
def main(): ############################################################ # Reading the twitter data, converting it into a digraph di_graph = nx.read_edgelist("twitter_combined.txt", create_using=nx.DiGraph(), nodetype=int) # Getting the number of nodes in di_graph num_of_nodes = di_graph.number_of_nodes() ############################################################ ############################################################ # Defining Arrays later to be used for plotting copeland_scores = [] degree_ratios = [] c_centralities = [] c_centralities_dict = {} b_centralities = [] b_centralities_dict = {} ############################################################ ############################################################ # Calculating Copeland Score and Degree Ratio for g in di_graph: in_degree = di_graph.in_degree(g) out_degree = di_graph.out_degree(g) copeland = out_degree - in_degree degree = round((out_degree + 1)/(in_degree + 1), 3) copeland_scores.append(copeland) degree_ratios.append(degree) ############################################################ ############################################################ # Plotting Copeland Score and Degree Ration Historgrams plot_degree_dist_2(copeland_scores, 'Copeland Score Histogram') plot_degree_dist_2(degree_ratios, 'Degree Ratio Histogram') ############################################################ ############################################################ # Plotting Betweenness Centrality Histogram # Calculating b centralities using a networkx function b_centrality_dict = nx.betweenness_centrality(di_graph, k=int(num_of_nodes/16)) # Writing Betweenness Centrality Values to a file f = open('b_centrality_values.txt', 'w') for key, val in b_centrality_dict.items(): f.write(str(key) + ' ' + str(val)) f.write('\n') # Reading B Centrality values from a file with open('b_centrality_values.txt', 'r') as b_file: line = b_file.readlines() for l in line: el = l.split() b_centralities.append(float(el[1])) b_centralities_dict[el[0]] = el[1] for i in range(0, len(b_centralities)): print(b_centralities[i]) plot_degree_dist_2(b_centralities, 'Betweenness Centrality Histogram') ############################################################ ############################################################ # Plotting Closeness Centrality Histogram # Calculating c centralities using a networkx function c_centrality_dict = nx.closeness_centrality(di_graph) # Writing Closeness Centrality Values to a file f = open('c_centrality_values.txt', 'w') for key, val in c_centrality_dict.items(): f.write(str(key) + ' ' + str(val)) f.write('\n') # Reading Closeness Centrality Values from a file with open('c_centrality_values.txt', 'r') as c_file: line = c_file.readlines() for l in line: el = l.split() c_centralities.append(float(el[1])) c_centralities_dict[el[0]] = el[1] plot_degree_dist_2(c_centralities, 'Closeness Centrality Histogram') ############################################################ ############################################################ # Mean, Median, and SD for Degree Ratio, Copeland Score, # and C Centrality for highest B Centrality b_centralities_high = [] copeland_scores_high_bc = [] degree_ratios_high_bc = [] c_centralities_high_bc = [] for key, val in b_centralities_dict.items(): if float(val) > 0.00002: b_centralities_high.append(key) for g in di_graph: try: if b_centralities_high.index(str(g)): in_degree = di_graph.in_degree(g) out_degree = di_graph.out_degree(g) copeland = out_degree - in_degree degree = round((out_degree + 1)/(in_degree + 1), 3) copeland_scores_high_bc.append(copeland) degree_ratios_high_bc.append(degree) c_centralities_high_bc.append(float(c_centralities_dict[str(g)])) except ValueError: print('', end='') # Mean, Median, and SD of Copeland Score mean_copeland = mean(copeland_scores_high_bc) median_copeland = median(copeland_scores_high_bc) std_dev_copeland = stdev(copeland_scores_high_bc) print(mean_copeland) print(median_copeland) print(std_dev_copeland) print(copeland_scores_high_bc) print() print() print() # Mean, Median, and SD of Degree Ratio mean_degree = mean(degree_ratios_high_bc) median_degree = median(degree_ratios_high_bc) std_dev_degree = stdev(degree_ratios_high_bc) print(mean_degree) print(median_degree) print(std_dev_degree) print(degree_ratios_high_bc) print() print() print() # Mean, Median, and SD of Closeness Centrality mean_closeness = mean(c_centralities_high_bc) median_closeness = median(c_centralities_high_bc) std_dev_closeness = stdev(c_centralities_high_bc) print(mean_closeness) print(median_closeness) print(std_dev_closeness) print(c_centralities_high_bc) ############################################################ ############################################################ # Triadic Census print(nx.triadic_census(di_graph))
def networkRandom(numNodes, Degree): #numNodes = random.randint(1, 100) #Degree = random.randint(1, numNodes) while ((numNodes * Degree) % 2 != 0): Degree = random.randint(1, numNodes) H = nx.random_regular_graph(Degree, numNodes, seed=None) G = H.to_directed() print(nx.info(G)) triads = nx.triadic_census(G) print("Triad: Occurences") for i in triads: if (triads[i] != 0) and (i != '003') and (i != '012') and (i != '102'): print(i, " : ", triads[i]) print("-------------") TRICODES = (1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8, 7, 11, 2, 6, 4, 8, 5, 9, 9, 13, 6, 10, 9, 14, 7, 14, 12, 15, 2, 5, 6, 7, 6, 9, 10, 14, 4, 9, 9, 12, 8, 13, 14, 15, 3, 7, 8, 11, 7, 12, 14, 15, 8, 14, 13, 15, 11, 15, 15, 16) #: important: it corresponds to the tricodes given in :data:`TRICODES`. TRIAD_NAMES = ('003', '012', '102', '021D', '021U', '021C', '111D', '111U', '030T', '030C', '201', '120D', '120U', '120C', '210', '300') #: A dictionary mapping triad code to triad name. TRICODE_TO_NAME = { i: TRIAD_NAMES[code - 1] for i, code in enumerate(TRICODES) } # ---------------------------------------------------------------------- # trianglesList = [] jsonList = [] if os.path.exists('randomTriads.json'): os.remove('randomTriads.json') for triangle in getting_Triangles(G): trianglesList.append(triangle) for triangle in trianglesList: triangle_code = TRICODE_TO_NAME[tricode(G, triangle[0], triangle[1], triangle[2])] jsonList.append({ 'x': int(triangle[0]), 'y': int(triangle[1]), 'z': int(triangle[2]), 'id': triangle_code, 'connections': [int(triangle[0]), int(triangle[1]), int(triangle[2])] }) with open('randomTriads.json', 'w') as json_file: json.dump(jsonList, json_file) return G
'201': [], '120D': [], '120U': [], '120C': [], '210': [], '300': [], 'null_dyad': [], 'sigle_dyad': [], 'mutual_dyad': [] } for tag in tags: (g1, nodes_has_info) = get_guarantee_network(tag) ###### generate 10000 directed configuration model tmp_dic = nx.triadic_census(g1) for key, value in tmp_dic.items(): glb_dic[key].append(value) recip_ratio = nx.overall_reciprocity(g1) total_nodes = g1.number_of_nodes() total_edges = g1.number_of_edges() double_link = recip_ratio * total_edges sigle_dyad = total_edges - double_link glb_dic['sigle_dyad'].append(sigle_dyad) # sigle_dyad_list.append(sigle_dyad) mutual_dyad = double_link / 2 glb_dic['mutual_dyad'].append(mutual_dyad) # mutual_dyad_list.append (mutual_dyad) null_dyad = total_nodes * (total_nodes - 1) / 2 - sigle_dyad - mutual_dyad # null_dyad_list.append (null_dyad)
# set alpha value for each edge # for i in range(M): # edges[i].set_alpha(edge_alphas[i]) # pc = mpl.collections.PatchCollection(edges, cmap=plt.cm.Blues) # pc.set_array(edge_colors) # plt.colorbar(pc) textstr = "Number of edges: {}".format(M) ax = plt.gca() props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=14, verticalalignment='top', bbox=props) triad_trends = '' triad_stat = nx.triadic_census(G) for k, v in triad_stat.items(): triad_trends += k + ": " + str(v) +"\n" ax.text(0, 0.85, triad_trends, transform=ax.transAxes, fontsize=14, verticalalignment='top', bbox=props) # inn = nx.in_degree_centrality(G) # out = nx.out_degree_centrality(G) # with open('centrality.csv', mode = 'w') as cen: # header_writer = csv.writer(cen, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # header_writer.writerow(['', 'In-degree-centrality', 'Out-degree-centrality']) # stat_writer = csv.writer(cen, delimiter= ',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # for k in inn: # stat_writer.writerow([k, inn[k], out[k]]) ax.set_axis_off() plt.show()
def gettriadiccensus(self): return nx.triadic_census(self.G)
def printStatistics(): # Reply graph Traid print() for i in range( ClusteringTweet.tweetDataFrame.groupby( 'group').nunique().shape[0]): tabletitleString = "|Group Number|Interaction Type|" tabletitleBottom = "|:------------:|:------------:|" for triadic_type in (nx.triadic_census( UserInteractionGraph.nxGraphSeperatedByGroup[1]["mention"]) ).keys(): tabletitleString += triadic_type + "|" tabletitleBottom += ":-----------------:|" print(tabletitleString) print(tabletitleBottom) tabletitlerow = "|" + str(i) + "|mention|" triadic_cencus = nx.triadic_census( UserInteractionGraph.nxGraphSeperatedByGroup[i] ["mention"]).values() for value in triadic_cencus: tabletitlerow += str(value) + "|" print(tabletitlerow) tabletitlerow = "|" + str(i) + "|retweet|" triadic_cencus = nx.triadic_census( UserInteractionGraph.nxGraphSeperatedByGroup[i] ["retweet"]).values() for value in triadic_cencus: tabletitlerow += str(value) + "|" print(tabletitlerow) tabletitlerow = "|" + str(i) + "|reply|" triadic_cencus = nx.triadic_census( UserInteractionGraph.nxGraphSeperatedByGroup[i] ["reply"]).values() for value in triadic_cencus: tabletitlerow += str(value) + "|" print(tabletitlerow) print() tabletitleString = "|Group Number|Interaction Type|Strong Tie|Weak Tie|" tabletitleBottom = "|:------------:|:------------:|:------------:|:------------:|" print(tabletitleString) print(tabletitleBottom) mention_dict = UserInteractionGraph.edgesByGroup[i]["mention"] mention_np = np.array(list(mention_dict.values())) print("|" + str(i) + "|mention|" + str(np.count_nonzero(mention_np > 1)) + "|" + str(np.count_nonzero(mention_np == 1)) + "|") retweet_dict = UserInteractionGraph.edgesByGroup[i]["retweet"] retweet_np = np.array(list(retweet_dict.values())) print("|" + str(i) + "|retweet|" + str(np.count_nonzero(retweet_np > 1)) + "|" + str(np.count_nonzero(retweet_np == 1)) + "|") reply_dict = UserInteractionGraph.edgesByGroup[i]["reply"] reply_np = np.array(list(reply_dict.values())) print("|" + str(i) + "|reply|" + str(np.count_nonzero(reply_np > 1)) + "|" + str(np.count_nonzero(reply_np == 1)) + "|") print() print()
# triadic census of the dominance network represented as a digraph, # individuals are the nodes, and edges their dominance relationship triad_cfg = { '003' : 'Null', '012' : 'Single-edge', '021C': 'Pass-along', '021D': 'Double-dominant', '021U': 'Double-subordinate', '030C': 'Cycle', '030T': 'Transitive' } net_G = nx.from_numpy_matrix(dom_mat, create_using=nx.DiGraph) census = nx.triadic_census(net_G) sp = triadSignificanceProfile(net_G, triad_cfg) #with open('requirements.txt', 'a') as f: # f.write('%s\n' % sp) f_census = {} f_census['group-size'] = [N_IND] f_census['flee-dist'] = [params['female.FleeDist']] f_census['aggr-intensity'] = [('mild' if params['Rating.Dom.female.Intensity'] == 0.1 else 'fierce')] f_census['steepness'] = round(steep,4) print('\nNetwork Triadic Census:') for k,v in sorted(census.items()): if k in triad_cfg: f_census[triad_cfg[k]] = [v]
def runTriads(graph): TRICODES = (1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8, 7, 11, 2, 6, 4, 8, 5, 9, 9, 13, 6, 10, 9, 14, 7, 14, 12, 15, 2, 5, 6, 7, 6, 9, 10, 14, 4, 9, 9, 12, 8, 13, 14, 15, 3, 7, 8, 11, 7, 12, 14, 15, 8, 14, 13, 15, 11, 15, 15, 16) #: important: it corresponds to the tricodes given in :data:`TRICODES`. TRIAD_NAMES = ('003', '012', '102', '021D', '021U', '021C', '111D', '111U', '030T', '030C', '201', '120D', '120U', '120C', '210', '300') #: A dictionary mapping triad code to triad name. TRICODE_TO_NAME = { i: TRIAD_NAMES[code - 1] for i, code in enumerate(TRICODES) } # ---------------------------------------------------------------------- # # building menu system: # Needs to be able to read in file name and choose appropriate networkx strategy #FRONT END PLEASE HAND IN FILE NAME IN STRING FORMAT? G = nx.to_directed(graph) print(nx.info(G)) zScores = [] deltaValues = [] triadList = [] nodeCount = nx.number_of_nodes(G) edgeCount = nx.number_of_edges(G) triads = nx.triadic_census(G) print("Triad: Occurences") triadTotal = 0 for i in triads: if (triads[i] != 0) and (i != '003') and (i != '012') and (i != '102'): print(i, " : ", triads[i]) triadList.append(i) triadTotal += triads[i] rand_graph = networkRandom( nodeCount, edgeCount // nodeCount ) # need to make this as a subgraph of all nodes of a specific triad rand_triads = nx.triadic_census(rand_graph) rand_total = 0 subgraph_nodes = [] newRandGraph = None trianglesList = [] for i in rand_triads: rand_total += 1 for triangle in getting_Triangles(G): trianglesList.append(triangle) for i in triads: if (triads[i] != 0) and (i != '003') and (i != '012') and (i != '102'): for j in rand_triads: if i == j: for triangle in trianglesList: triangleCode = TRICODE_TO_NAME[tricode( G, triangle[0], triangle[1], triangle[2])] if triangleCode == i: subgraph_nodes.append(int(triangle[0])) subgraph_nodes.append(int(triangle[1])) subgraph_nodes.append(int(triangle[2])) subgraph_nodes = set(subgraph_nodes) newRandGraph = rand_graph.subgraph(subgraph_nodes) zScores.append( calculateStatSignificance(triads[i], rand_triads[j], newRandGraph, triadTotal, rand_total)) deltaValues.append( calculateDeltaValues(triads[i], rand_triads[j], triadTotal, rand_total)) subgraph_nodes = [] sigProfile = calculateSignificanceProfile(zScores) subgraphRatio = subgraphRatioProfile(deltaValues) print(sigProfile) print(subgraphRatio) print("-------------") print(triadList) trianglesList = [] jsonList = [] if os.path.exists('triads.json'): os.remove('triads.json') for triangle in getting_Triangles(G): trianglesList.append(triangle) for triangle in trianglesList: triangleCode = TRICODE_TO_NAME[tricode(G, triangle[0], triangle[1], triangle[2])] jsonList.append({ 'x': int(triangle[0]), 'y': int(triangle[1]), 'z': int(triangle[2]), 'id': triangleCode, 'connections': [int(triangle[0]), int(triangle[1]), int(triangle[2])] }) statList = [] #for i in range(0, len(triadList)): # print("TRIAD LIST: " + str(triadList)) # statList.append([triadList[i], sigProfile[i]]) #subgraphRatio[i]]) triadListIndex = 0 for i in range(0, len(TRIAD_NAMES)): if (TRIAD_NAMES[i] in triadList): statList.append([TRIAD_NAMES[i], sigProfile[triadListIndex]]) triadListIndex += 1 else: statList.append([TRIAD_NAMES[i], 0]) return jsonList, statList
def getTriads(graph): triads_16 = nx.triadic_census(graph) triads_13 = [(x,y) for x,y in triads_16.items() if(x != '003' and x != '012' and x != '102')] return triads_13
# authorName = 'Jian Pei' # authorName = 'Hui Xiong' authorName = 'Geoffrey E. Hinton' # authorName='Yoshua Bengio' # authorName='Ilya Sutskever' # authorName='Michael I. Jordan' ###################20180709write data into csv################### g = nx.read_gml('trees/newdata/' + str(authorName) + ".gml") triads = {} for nodes in g.nodes(): triads.update({ nodes: list( nx.triadic_census( g.subgraph([ n for n in (g.successors(nodes) and g.predecessors(nodes)) ])).values()) }) data_old = pd.read_csv('log/' + str(kinds) + '/' + str(authorName) + ".csv") # data_old['Unnamed: 0']=data_old['Unnamed: 0'].astype(str) data = pd.DataFrame.from_dict(triads, orient='index') data.index.names = ['#author'] data = (pd.concat([data_old, data], axis=1, keys=['#author'])) # print(data) # data['x1']=data[3]+data[4]+data[5]+data[6]+data[7]+data[10] # data['x2']=data[9]+data[8]+data[9]+data[11]+data[12]+data[13]+data[14]+data[15] # cols = [10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
max_betweenness = max(graph_betweenness.items(), key=itemgetter(1)) #Plots and draws the graph. Intially draws all the nodes and edges thnen add the nodes of importance in a different colour. Here it plots the degree centrality node in orange which is essentilly the mos important user in the graph. #Saves and displays the graph so that it can be used in the report. plt.figure(figsize=(20, 20)) nx.draw(biggest_sub, pos=nx.spring_layout(biggest_sub, k=0.05), edge_color="black", linewidths=0.3, node_size=60, alpha=0.6, with_labels=False) nx.draw_networkx_nodes(biggest_sub, pos=nx.spring_layout(biggest_sub, k=0.05), nodelist=[max_degree[0]], node_size=300, node_color='orange') plt.savefig('graph.png') plt.show() #Question 4 - uses the graph to find out the connections that have been made #Uses triad census which returns a dictionary where the keys are the types of triad and the value is the number of occurences. triads = nx.triadic_census(u_i_graph.to_directed()) print(triads) #Number of edges associates with how many times two nodes are linked and therefore can determine how many links are in the graph. links = u_i_graph.number_of_edges() print(links)