def __init__(self, directory, name, weight_id, aggregate_number): self.name = name self.directory = directory self.G = nx.read_gexf(self.directory + self.name + '.gexf') self.weight_id = weight_id self.features = [] self.aggregate_number = aggregate_number self.Stats = StatsHandler(name)
def __init__(self, directory, name, weight_id, aggregate_number): self.name = name self.directory = directory self.G = nx.read_gexf(self.directory + self.name + '.gexf') self.weight_id = weight_id self.features = [] self.aggregate_number = aggregate_number self.Stats = StatsHandler(name) self.standard_text_distribution = ',standard deviation,skewness,kurtosis,hhi,q90%,q80%,q70%,q60%,q50%,q40%,q30%,q20%,q10%,q5%,q1%'
def __init__(self, directory, name, weight_id, aggregate_number): self.name = name self.directory = directory self.G = nx.read_gexf(self.directory+self.name+'.gexf') self.weight_id = weight_id self.features = [] self.aggregate_number = aggregate_number self.Stats = StatsHandler(name)
def __init__(self, directory, name, weight_id, aggregate_number): self.name = name self.directory = directory self.G = nx.read_gexf(self.directory+self.name+'.gexf') self.weight_id = weight_id self.features = [] self.aggregate_number = aggregate_number self.Stats = StatsHandler(name) self.standard_text_distribution = ',standard deviation,skewness,kurtosis,hhi,q90%,q80%,q70%,q60%,q50%,q40%,q30%,q20%,q10%,q5%,q1%'
class NetworkHandler: # ------------------------------------------------------------- # # init (directory, name, weight_id, aggregate_number) # # ------------------------------------------------------------- def __init__(self, directory, name, weight_id, aggregate_number): self.name = name self.directory = directory self.G = nx.read_gexf(self.directory + self.name + '.gexf') self.weight_id = weight_id self.features = [] self.aggregate_number = aggregate_number self.Stats = StatsHandler(name) self.standard_text_distribution = ',standard deviation,skewness,kurtosis,hhi,q90%,q80%,q70%,q60%,q50%,q40%,q30%,q20%,q10%,q5%,q1%' # ------------------------------------------------------------- # # set_general_values() # # ------------------------------------------------------------- def set_general_values(self): general_values = [] # size general_values.append(len(self.G.nodes())) txt = ',number of nodes' # edges general_values.append(len(self.G.edges())) txt += ',number of edges' # nb lenders nb_lenders = 0 out_deg = self.G.out_degree() for deg in out_deg.values(): if deg > 0: nb_lenders += 1 general_values.append(nb_lenders) txt += ',number of lenders' # nb borrowers nb_borrowers = 0 in_deg = self.G.in_degree() for deg in in_deg.values(): if deg > 0: nb_borrowers += 1 general_values.append(nb_borrowers) txt += ',number of borrowers' return [general_values, txt] # ------------------------------------------------------------- # # set_degree_distribution # computes cumulative distribution for # all - in - out # and # computes correlation between in and out # # ------------------------------------------------------------- def set_degree_analysis(self): degree_analysis = [] txt = '' # TOTAL self.degree_distribution = self.G.degree() statistics = self.Stats.get_distribution_info(self.degree_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.degree_distribution, "total degree distribution") degree_analysis.extend(statistics[:5]) degree_analysis.extend(statistics[5]) txt += ',average degree' + self.standard_text_distribution # IN self.in_degree_distribution = self.G.in_degree() statistics = self.Stats.get_distribution_info( self.in_degree_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.in_degree_distribution, "in degree distribution") degree_analysis.extend(statistics[:5]) degree_analysis.extend(statistics[5]) txt += ',average in degree' + self.standard_text_distribution # OUT self.out_degree_distribution = self.G.out_degree() statistics = self.Stats.get_distribution_info( self.out_degree_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.out_degree_distribution, "out degree distribution") degree_analysis.extend(statistics[:5]) degree_analysis.extend(statistics[5]) txt += ',average out degree' + self.standard_text_distribution # CORRELATION keys = self.G.nodes() d_in = [] d_out = [] for key in keys: d_in.append(self.in_degree_distribution[key]) d_out.append(self.out_degree_distribution[key]) #storing complete distribution for statistical analysis self.Stats.r_square(d_in, d_out, "degree correlation") degree_analysis.extend('A') txt += ',correlation in out degree' #ASSORTATIVITY d_1 = [] d_2 = [] for edge in self.G.edges(): d_1.append(self.degree_distribution[edge[0]]) d_2.append(self.degree_distribution[edge[1]]) #storing complete distribution for statistical analysis self.Stats.r_square(d_1, d_2, "degree assortativity") degree_analysis.extend('A') txt += ',assortativity' #RECIPROCITY density = float(len(self.G.edges())) / (len(self.G.nodes()) * (len(self.G.nodes()) - 1)) reciprocal_value_num = 0.0 reciprocal_value_den = 0.0 for i in range(len(self.G.nodes())): for j in range(len(self.G.nodes())): if i != j: a_ij = 0 a_ji = 0 if self.G.has_edge(self.G.nodes()[i], self.G.nodes()[j]): a_ij = 1 if self.G.has_edge(self.G.nodes()[j], self.G.nodes()[i]): a_ji = 1 reciprocal_value_num += (float(a_ij - density) * float(a_ji - density)) reciprocal_value_den += ((a_ij - density) * (a_ij - density)) reciprocal_value = float(reciprocal_value_num) / reciprocal_value_den degree_analysis.extend([reciprocal_value]) txt += ',reciprocity' return [degree_analysis, txt] # ------------------------------------------------------------- # # set_volume_distribution() # # ------------------------------------------------------------- def set_volume_distribution(self): volume_analysis = [] txt = '' # TOTAL self.volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data=True): if node in edge[1] or node in edge[0]: volume += edge[2][self.weight_id] self.volume_distribution[node] = volume total_volume = sum(self.volume_distribution.values()) volume_analysis.append(total_volume) statistics = self.Stats.get_distribution_info(self.volume_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.volume_distribution, "total volume distribution") volume_analysis.extend(statistics[:5]) volume_analysis.extend(statistics[5]) txt += ',full volume, average volume' + self.standard_text_distribution # IN self.in_volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data=True): if node in edge[1]: volume += edge[2][self.weight_id] self.in_volume_distribution[node] = volume tota_volume_in = sum(self.in_volume_distribution.values()) volume_analysis.append(tota_volume_in) statistics = self.Stats.get_distribution_info( self.in_volume_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.in_volume_distribution, "total in volume distribution") volume_analysis.extend(statistics[:5]) volume_analysis.extend(statistics[5]) txt += ',full in volume, average in volume' + self.standard_text_distribution # OUT self.out_volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data=True): if node in edge[0]: volume += edge[2][self.weight_id] self.out_volume_distribution[node] = volume tota_volume_out = sum(self.out_volume_distribution.values()) volume_analysis.append(tota_volume_out) statistics = self.Stats.get_distribution_info( self.out_volume_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.out_volume_distribution, "total out volume distribution") volume_analysis.extend(statistics[:5]) volume_analysis.extend(statistics[5]) txt += ',full out volume, average out volume' + self.standard_text_distribution # # correlation keys = self.G.nodes() v_in = [] v_out = [] for key in keys: v_in.append(self.in_volume_distribution[key]) v_out.append(self.out_volume_distribution[key]) #storing complete distribution for statistical analysis self.Stats.r_square(v_in, v_out, "volume correlation") volume_analysis.extend('A') txt += ',correlatin in out volume' return [volume_analysis, txt] # ------------------------------------------------------------- # # set_clustering_distribution () # # ------------------------------------------------------------- def set_clustering_distribution(self): # only indirected G_undirected = self.G.to_undirected() clustering_distributions = [] txt = '' # unweighted self.unweighted_clustering_distribution = nx.clustering(G_undirected) statistics = self.Stats.get_distribution_info( self.unweighted_clustering_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.unweighted_clustering_distribution, "unweighted clustering distribution") clustering_distributions.extend(statistics[:5]) clustering_distributions.extend(statistics[5]) txt += ',average clustering coeficient (unweighted)' + self.standard_text_distribution # # weighted self.weighted_clustering_distribution = nx.clustering( G_undirected, G_undirected.nodes(), self.weight_id) # statistics = self.Stats.get_distribution_info(self.weighted_clustering_distribution) # #storing complete distribution for statistical analysis # self.Stats.ks_store(self.weighted_clustering_distribution, "weighted clustering distribution") # clustering_distributions.extend(statistics[:5]) # clustering_distributions.extend(statistics[5]) # txt += ',average clustering coeficient (weighted)' + self.standard_text_distribution return [clustering_distributions, txt] # ------------------------------------------------------------- # # centrality_measures() # # ------------------------------------------------------------- def centrality_measures(self): centrality_measures = [] txt = '' # betweenness # unweighted self.unweighted_betweenness_distribution = nx.betweenness_centrality( self.G) statistics = self.Stats.get_distribution_info( self.unweighted_betweenness_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) txt += ',average betweenness centrality (unweighted)' + self.standard_text_distribution # # weighted self.weighted_betweenness_distribution = nx.betweenness_centrality( self.G, weight=self.weight_id) # statistics = self.Stats.get_distribution_info(self.weighted_betweenness_distribution) # centrality_measures.extend(statistics[:5]) # centrality_measures.extend(statistics[5]) # txt += ',average betweenness centrality (weighted)' + self.standard_text_distribution # closeness # unweighted self.unweighted_closeness_distribution = nx.closeness_centrality( self.G) statistics = self.Stats.get_distribution_info( self.unweighted_closeness_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) txt += ',average closeness centrality (unweighted)' + self.standard_text_distribution # eigen vector # right try: self.right_eigenvector_distribution = nx.eigenvector_centrality( self.G) statistics = self.Stats.get_distribution_info( self.right_eigenvector_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) except: centrality_measures.extend([0, 0, 0, 0, 0]) centrality_measures.extend([0] * len(statistics[5])) txt += ',average right eigenvector' + self.standard_text_distribution # left try: G_rev = self.G.reverse() self.lef_eigenvector_distribution = nx.eigenvector_centrality( G_rev) statistics = self.Stats.get_distribution_info( self.lef_eigenvector_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) except: centrality_measures.extend([0, 0, 0, 0, 0]) centrality_measures.extend([0] * len(statistics[5])) txt += ',average left eigenvector' + self.standard_text_distribution return [centrality_measures, txt] # ------------------------------------------------------------- # # transversal_measures() # # ------------------------------------------------------------- def transversal_measures(self): transversal_measures = [] txt = '' # - V(k) # all title = "Vol(k) all" degrees = [] volumes = [] keys = self.degree_distribution.keys() for key in keys: degrees.append(self.degree_distribution[key]) volumes.append(self.volume_distribution[key]) self.Stats.r_square(degrees, volumes, title) transversal_measures.extend('A') # - in title = "Vol(k) in" in_degrees = [] in_volumes = [] keys = [] keys = self.in_degree_distribution.keys() for key in keys: in_degrees.append(self.in_degree_distribution[key]) in_volumes.append(self.in_volume_distribution[key]) self.Stats.r_square(in_degrees, in_volumes, title) transversal_measures.extend('A') title = "Vol(k) out" out_degrees = [] out_volumes = [] keys = [] keys = self.out_degree_distribution.keys() for key in keys: out_degrees.append(self.out_degree_distribution[key]) out_volumes.append(self.out_volume_distribution[key]) self.Stats.r_square(out_degrees, out_volumes, title) transversal_measures.extend('A') # - C(k) G_undirected = self.G.to_undirected() undirected_degree_distribution = G_undirected.degree() # unweighted cluster title = "C(k) unweighted" degrees = [] unweighted_clusters = [] keys = undirected_degree_distribution.keys() for key in keys: degrees.append(undirected_degree_distribution[key]) unweighted_clusters.append( self.unweighted_clustering_distribution[key]) self.Stats.r_square(degrees, unweighted_clusters, title) transversal_measures.extend('A') # weighted cluster title = "C(k) weighted" degrees = [] weighted_clusters = [] keys = self.degree_distribution.keys() for key in keys: degrees.append(undirected_degree_distribution[key]) weighted_clusters.append( self.weighted_clustering_distribution[key]) self.Stats.r_square(degrees, weighted_clusters, title) transversal_measures.extend('A') # - Vij title = "Vij(kikj) with no aggregation" edges_volumes = [] degrees = [] for edge in self.G.edges(data=True): node1_degree = self.out_degree_distribution[edge[0]] node2_degree = self.in_degree_distribution[edge[1]] weight = edge[2][self.weight_id] edges_volumes.append(weight) degrees.append(node1_degree * node2_degree) self.Stats.r_square(degrees, edges_volumes, title) transversal_measures.extend('A') txt += ',correlation total volume degree,correlation in volume degree,correlation out volume degree,correlation unweighted cluster degree,correlation weighted cluster degree,correlation weight end degree product' return [transversal_measures, txt] # ------------------------------------------------------------- # # scc_analysis() # # ------------------------------------------------------------- def scc_analysis(self): scc_stats = [] txt = '' #WGCC analysis wccs = nx.weakly_connected_component_subgraphs(self.G) n_wcc = len(wccs) # new to add to list! scc_stats.append(n_wcc) txt += ',number of wccs' nodes_in_lwcc = nx.weakly_connected_components(self.G)[0] size = len(self.G.nodes()) # share in gwcc share = float(len(nodes_in_lwcc)) / size lwcc = wccs[0] avg_shortest_path_lentgh = nx.average_shortest_path_length(lwcc) scc_stats.extend([share, avg_shortest_path_lentgh]) txt += ',LWCC - share of WCC,LWCC - shortest path length' # number of nodes n = len(nodes_in_lwcc) # number of links l = len(lwcc.edges()) # volume volume = 0.0 for edge in lwcc.edges(data=True): volume += edge[2][self.weight_id] # new to add to list! scc_stats.extend([n, l, volume]) txt += ',number of nodes,number of links,total volume' #LSCC analysis sccs = nx.strongly_connected_component_subgraphs(self.G) # number of sccs n_scc = len(sccs) scc_stats.append(n_scc) txt += ',number of sccs' # Bow Tie analysis for the largest SCC nodes_in_lscc = nx.strongly_connected_components(self.G)[0] other_nodes = list(set(self.G.nodes()) ^ set(nodes_in_lscc)) in_nodes = [] out_nodes = [] for node in other_nodes: edges = self.G.edges() stop = False i = 0 while (stop == False and i < len(edges) - 1): if node in edges[i]: if edges[i][1] in nodes_in_lscc: in_nodes.append(node) stop = True else: if edges[i][0] in nodes_in_lscc: out_nodes.append(node) stop = True i += 1 disconnected_nodes = list( set(other_nodes) ^ set(in_nodes) ^ set(out_nodes)) size = len(self.G.nodes()) # scc_stats.extend([float(len(nodes_in_lscc))/size, float(len(in_nodes))/size, float(len(out_nodes))/size, float(len(disconnected_nodes))/size]) # SCC # share in scc share = float(len(nodes_in_lscc)) / size lscc = sccs[0] avg_shortest_path_lentgh = nx.average_shortest_path_length(lscc) diameter = nx.diameter(lscc) scc_stats.extend([share, avg_shortest_path_lentgh, diameter]) txt += ',LSCC - share of scc,LSCC - shortest path lentgh,LSCC - diameter' # number of nodes n = len(nodes_in_lscc) # number of links l = len(lscc.edges()) # volume of edges inside the lscc volume_edges = 0.0 for edge in lscc.edges(data=True): volume_edges += edge[2][self.weight_id] # total, in and out volume of nodes inside the lscc total_volume_nodes = 0.0 in_volume_nodes = 0.0 out_volume_nodes = 0.0 for node in lscc.nodes(): total_volume_nodes += self.volume_distribution[node] in_volume_nodes += self.in_volume_distribution[node] out_volume_nodes += self.out_volume_distribution[node] scc_stats.extend([ n, l, volume_edges, total_volume_nodes, in_volume_nodes, out_volume_nodes ]) txt += ',number of nodes, number of links,volume edges, total volume nodes, in volume nodes, out volume nodes' # IN # share share = float(len(in_nodes)) / size # number of nodes n = len(in_nodes) # number of links # volume n_links = 0 volume = 0.0 for edge in self.G.edges(data=True): # if edge[0] in in_nodes or edge[1] in in_nodes: if edge[0] in in_nodes and edge[1] in lscc: n_links += 1 volume += edge[2][self.weight_id] # total, in and out volume of nodes inside the IN total_volume_nodes = 0.0 in_volume_nodes = 0.0 out_volume_nodes = 0.0 for node in in_nodes: total_volume_nodes += self.volume_distribution[node] in_volume_nodes += self.in_volume_distribution[node] out_volume_nodes += self.out_volume_distribution[node] scc_stats.extend([ share, n, l, volume_edges, total_volume_nodes, in_volume_nodes, out_volume_nodes ]) txt += ',LSCC - share IN,number of nodes,number of links,volume edges,total volume nodes, in volume nodes, out volume nodes' # OUT # share share = float(len(out_nodes)) / size # number of nodes n = len(out_nodes) # number of links # volume n_links = 0 volume = 0.0 for edge in self.G.edges(data=True): # if edge[0] in out_nodes or edge[1] in out_nodes: if edge[0] in lscc and edge[1] in out_nodes: n_links += 1 volume += edge[2][self.weight_id] # total, in and out volume of nodes inside the IN total_volume_nodes = 0.0 in_volume_nodes = 0.0 out_volume_nodes = 0.0 for node in out_nodes: total_volume_nodes += self.volume_distribution[node] in_volume_nodes += self.in_volume_distribution[node] out_volume_nodes += self.out_volume_distribution[node] scc_stats.extend([ share, n, l, volume_edges, total_volume_nodes, in_volume_nodes, out_volume_nodes ]) txt += ',LSCC - share OUT,number of nodes,number of links,volume edges,total volume nodes, in volume nodes, out volume nodes' # EIGENVECTOR IN LSCC #right try: self.right_eigenvector_distribution_lscc = nx.eigenvector_centrality( lscc) statistics = self.Stats.get_distribution_info( self.right_eigenvector_distribution_lscc) scc_stats.extend(statistics[:5]) scc_stats.extend(statistics[5]) except: scc_stats.extend([0, 0, 0, 0, 0]) # MAKE THE NUMBER OF PERCENTILES VARIABLE! scc_stats.extend([0] * 11) txt += ',average right eigenvector lscc' + self.standard_text_distribution # left try: lscc_rev = lscc.reverse() self.lef_eigenvector_distribution_lscc = nx.eigenvector_centrality( lscc_rev) statistics = self.Stats.get_distribution_info( self.lef_eigenvector_distribution_lscc) scc_stats.extend(statistics[:5]) scc_stats.extend(statistics[5]) except: scc_stats.extend([0, 0, 0, 0, 0]) scc_stats.extend([0] * 11) txt += ',average left eigenvector lscc' + self.standard_text_distribution # KATZ IN LSCC try: self.katz_distribution_lscc = nx.eigenvector_centrality(lscc) statistics = self.Stats.get_distribution_info( self.katz_distribution_lscc) scc_stats.extend(statistics[:5]) scc_stats.extend(statistics[5]) except: scc_stats.extend([0, 0, 0, 0, 0]) # MAKE THE NUMBER OF PERCENTILES VARIABLE! scc_stats.extend([0] * 11) txt += ',average katz centrality' + self.standard_text_distribution return [scc_stats, txt] # Giving work to Matlab def save_extra(self): self.Stats.save_ks_s()
class NetworkHandler: # ------------------------------------------------------------- # # init (directory, name, weight_id, aggregate_number) # # ------------------------------------------------------------- def __init__(self, directory, name, weight_id, aggregate_number): self.name = name self.directory = directory self.G = nx.read_gexf(self.directory + self.name + '.gexf') self.weight_id = weight_id self.features = [] self.aggregate_number = aggregate_number self.Stats = StatsHandler(name) # ------------------------------------------------------------- # # set_general_values() # # ------------------------------------------------------------- def set_general_values(self): general_values = [] general_values.append("general values:") # size general_values.append(len(self.G.nodes())) # edges general_values.append(len(self.G.edges())) # total volume total_volume = 0.0 for edge in self.G.edges(data=True): total_volume += edge[2][self.weight_id] general_values.append(total_volume) # nb lenders nb_lenders = 0 out_deg = self.G.out_degree() for deg in out_deg.values(): if deg > 0: nb_lenders += 1 general_values.append(nb_lenders) # nb borrowers nb_borrowers = 0 in_deg = self.G.in_degree() for deg in in_deg.values(): if deg > 0: nb_borrowers += 1 general_values.append(nb_borrowers) # avg degree deg = self.G.degree() general_values.append(float(sum(deg.values())) / len(deg)) # print general_values self.features.append(general_values) # ------------------------------------------------------------- # # set_degree_distribution # computes cumulative distribution for # all - in - out # and # computes correlation between in and out # # ------------------------------------------------------------- def set_degree_analysis(self): # for KS statistical test # to verify continuous = False degree_distributions = [] degree_distributions.append("degree distribution") # total degree title = "total degree" degree_distributions.append(title) self.degree_distribution = self.G.degree() [degree_cumulative_distribution_agg, degree_distribution_agg_sd ] = self.Stats.analyze_distribution(self.degree_distribution, self.aggregate_number, continuous, title) # # - compute cdf for real data # degree_cumulative_distribution = self.Stats.get_cumulative_distribution(self.degree_distribution) # # - compute aggregated values # [degree_distribution_agg, # degree_distribution_agg_sd] = self.Stats.aggregate_distribution(self.degree_distribution, self.aggregate_number, True) # # - compute cdf for aggregated # degree_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(degree_distribution_agg) # # - store sd from aggregated to real data degree_distributions.append("standard error") degree_distributions.append(degree_distribution_agg_sd) # - store cdf aggregated degree_distributions.append("aggregated cdf") degree_distributions.append(degree_cumulative_distribution_agg) # # - computing the Kolmogorov-Smirnov test # self.Stats.kolmogorov_smirnov(degree_cumulative_distribution[0], # degree_cumulative_distribution_agg[0], continuous) # # # in degree # degree_distributions.append("\nin degree:") # self.in_degree_distribution = self.G.in_degree() # in_degree_cumulative_distribution = self.Stats.get_cumulative_distribution(self.in_degree_distribution, 0) # in_degree_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(self.in_degree_distribution, self.aggregate_number) # degree_distributions.append(in_degree_cumulative_distribution_agg) # # - computing the Kolmogorov-Smirnov test # self.Stats.kolmogorov_smirnov(in_degree_cumulative_distribution[0], # in_degree_cumulative_distribution_agg[0], continuous) # # out degree # degree_distributions.append("\nout degree:") # self.out_degree_distribution = self.G.out_degree() # out_degree_cumulative_distribution = self.Stats.get_cumulative_distribution(self.out_degree_distribution, 0) # out_degree_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(self.out_degree_distribution, self.aggregate_number) # degree_distributions.append(out_degree_cumulative_distribution_agg) # # # - computing the Kolmogorov-Smirnov test # self.Stats.kolmogorov_smirnov(out_degree_cumulative_distribution[0], # out_degree_cumulative_distribution_agg[0], continuous) # # correlation # degree_distributions.append("\nin out correlation:") # # - dependency # keys = self.G.nodes() # d_in_d_out = [] # for key in keys: # d_in_d_out.append([self.in_degree_distribution[key],self.out_degree_distribution[key]]) # deg_in_deg_out_dependency = self.Stats.get_dependency(d_in_d_out) # # - getting the aggregate dependency # deg_in_deg_out_dependency_agg = self.Stats.aggregate_distribution(deg_in_deg_out_dependency, self.aggregate_number) # degree_distributions.append(deg_in_deg_out_dependency_agg) # # - adding the sd of the real distribution after dependency computation # degree_distributions.append(deg_in_deg_out_dependency[2]) # # - computing the Kolmogorov-Smirnov test # self.Stats.kolmogorov_smirnov(deg_in_deg_out_dependency[1], deg_in_deg_out_dependency_agg[1], continuous) # # # - r_square # self.Stats.r_square([x[0] for x in d_in_d_out],[x[1] for x in d_in_d_out]) # STORING RESULTS self.features.append(degree_distributions) # ------------------------------------------------------------- # # set_volume_distribution() # # ------------------------------------------------------------- def set_volume_distribution(self): # for KS statistical test continuous = True volume_distributions = [] volume_distributions.append("volume distribution") # total volume volume_distributions.append("\nvolume total:") self.volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data=True): if node in edge[1] or node in edge[0]: volume += edge[2][self.weight_id] self.volume_distribution[node] = volume volume_cumulative_distribution = self.Stats.get_cumulative_distribution( self.volume_distribution, 0) volume_cumulative_distribution_agg = self.Stats.get_cumulative_distribution( self.volume_distribution, self.aggregate_number) volume_distributions.append(volume_cumulative_distribution_agg) # - computing the KS test # volume_cumulative_distribution_agg_ks = self.Stats.kolmogorov_smirnov(volume_cumulative_distribution[0], volume_cumulative_distribution_agg[0], continuous) # volume_distributions.append(volume_cumulative_distribution_agg_ks) # in volume volume_distributions.append("\nin total:") self.in_volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data=True): if node in edge[1]: volume += edge[2][self.weight_id] self.in_volume_distribution[node] = volume in_volume_cumulative_distribution = self.Stats.get_cumulative_distribution( self.in_volume_distribution, 0) in_volume_cumulative_distribution_agg = self.Stats.get_cumulative_distribution( self.in_volume_distribution, self.aggregate_number) volume_distributions.append(in_volume_cumulative_distribution_agg) # - computing the KS test # in_volume_cumulative_distribution_agg_ks = self.Stats.kolmogorov_smirnov(in_volume_cumulative_distribution[0], in_volume_cumulative_distribution_agg[0], continuous) # volume_distributions.append(in_volume_cumulative_distribution_agg_ks) # out volume volume_distributions.append("\nout total:") self.out_volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data=True): if node in edge[0]: volume += edge[2][self.weight_id] self.out_volume_distribution[node] = volume out_volume_cumulative_distribution = self.Stats.get_cumulative_distribution( self.out_volume_distribution, 0) out_volume_cumulative_distribution_agg = self.Stats.get_cumulative_distribution( self.out_volume_distribution, self.aggregate_number) volume_distributions.append(out_volume_cumulative_distribution_agg) # - computing the KS test # out_volume_cumulative_distribution_agg_ks = self.Stats.kolmogorov_smirnov( out_volume_cumulative_distribution[0], out_volume_cumulative_distribution_agg[0], continuous) # volume_distributions.append(out_volume_cumulative_distribution_agg_ks) # correlation volume_distributions.append("\nin out correlation:") # dependency keys = self.G.nodes() v_in_v_out = [] for key in keys: v_in_v_out.append([ self.in_volume_distribution[key], self.out_volume_distribution[key] ]) vol_in_vol_out_dependency = self.Stats.get_dependency(v_in_v_out) # - getting the aggregate dependency vol_in_vol_out_dependency_agg = self.Stats.aggregate_distribution( vol_in_vol_out_dependency, self.aggregate_number) volume_distributions.append(vol_in_vol_out_dependency_agg) # - adding the sd of the real distribution volume_distributions.append(vol_in_vol_out_dependency[2]) # - computing the KS test # vol_in_vol_out_ks = self.Stats.kolmogorov_smirnov(vol_in_vol_out_dependency[1], vol_in_vol_out_dependency_agg[1], continuous) # volume_distributions.append(vol_in_vol_out_ks) # - r_square # vol_in_vol_out_r = self.Stats.r_square([x[0] for x in v_in_v_out], [x[1] for x in v_in_v_out]) # volume_distributions.append(vol_in_vol_out_r) self.features.append(volume_distributions) # ------------------------------------------------------------- # # set_clustering_distribution () # # ------------------------------------------------------------- def set_clustering_distribution(self): # only indirected G_undirected = self.G.to_undirected() # for KS statistical test continuous = True clustering_distributions = [] clustering_distributions.append("clustering distribution") # unweighted clustering_distributions.append("\nunweighted:") self.unweighted_clustering_distribution = nx.clustering(G_undirected) unweighted_clustering_cumulative_distribution = self.Stats.get_cumulative_distribution( self.unweighted_clustering_distribution, 0) unweighted_clustering_cumulative_distribution_agg = self.Stats.get_cumulative_distribution( self.unweighted_clustering_distribution, self.aggregate_number) clustering_distributions.append( unweighted_clustering_cumulative_distribution_agg) # - computing the KS test # unweighted_clustering_cumulative_distribution_agg_ks = self.Stats.kolmogorov_smirnov( unweighted_clustering_cumulative_distribution[0], unweighted_clustering_cumulative_distribution_agg[0], continuous) # clustering_distributions.append(unweighted_clustering_cumulative_distribution_agg_ks) # adding the average value to the general values [average_unweighted_clustering, sd_unweighted_clustering ] = self.Stats.get_mean_sd(self.unweighted_clustering_distribution) self.features[0].append(average_unweighted_clustering) self.features[0].append(sd_unweighted_clustering) # weighted clustering_distributions.append("\nweighted:") self.weighted_clustering_distribution = nx.clustering( G_undirected, G_undirected.nodes(), self.weight_id) weighted_clustering_cumulative_distribution = self.Stats.get_cumulative_distribution( self.weighted_clustering_distribution, 0) weighted_clustering_cumulative_distribution_agg = self.Stats.get_cumulative_distribution( self.weighted_clustering_distribution, self.aggregate_number) clustering_distributions.append( weighted_clustering_cumulative_distribution_agg) # - computing the KS test # weighted_clustering_cumulative_distribution_agg_ks = self.Stats.kolmogorov_smirnov( weighted_clustering_cumulative_distribution[0], weighted_clustering_cumulative_distribution_agg[0], continuous) # clustering_distributions.append(weighted_clustering_cumulative_distribution_agg_ks) # adding the average value to the general values [average_weighted_clustering, sd_weighted_clustering ] = self.Stats.get_mean_sd(self.weighted_clustering_distribution) self.features[0].append(average_weighted_clustering) self.features[0].append(sd_weighted_clustering) self.features.append(clustering_distributions) # ------------------------------------------------------------- # # scc_analysis() # # ------------------------------------------------------------- def scc_analysis(self): scc_stats = [] sccs = nx.strongly_connected_component_subgraphs(self.G) # adding values to the general values lscc = sccs[0] avg_shortest_path_lentgh = nx.average_shortest_path_length(lscc) diameter = nx.diameter(lscc) self.features[0].append(avg_shortest_path_lentgh) self.features[0].append(diameter) # number of sccs n_scc = len(sccs) scc_stats.append(n_scc) # nodes per sccs nodes_scc = [] for subgraph in sccs: nodes_scc.append(len(subgraph.nodes())) scc_stats.append(nodes_scc) # links per sccs links_scc = [] for subgraph in sccs: links_scc.append(len(subgraph.edges())) scc_stats.append(links_scc) # volume per sccs volumes_scc = [] for subgraph in sccs: volume = 0.0 for edge in subgraph.edges(data=True): volume += edge[2][self.weight_id] volumes_scc.append(volume) scc_stats.append(volumes_scc) # Bow Tie analysis for the largest SCC nodes_in_lscc = nx.strongly_connected_components(self.G)[0] other_nodes = list(set(self.G.nodes()) ^ set(nodes_in_lscc)) in_nodes = [] out_nodes = [] for node in other_nodes: edges = self.G.edges() stop = False i = 0 while (stop == False and i < len(edges) - 1): if node in edges[i]: if edges[i][1] in nodes_in_lscc: in_nodes.append(node) stop = True else: if edges[i][0] in nodes_in_lscc: out_nodes.append(node) stop = True i += 1 disconnected_nodes = list( set(other_nodes) ^ set(in_nodes) ^ set(out_nodes)) size = len(self.G.nodes()) scc_stats.extend([ float(len(nodes_in_lscc)) / size, float(len(in_nodes)) / size, float(len(out_nodes)) / size, float(len(disconnected_nodes)) / size ]) self.features.append(scc_stats) # ------------------------------------------------------------- # # centrality_measures() # # ------------------------------------------------------------- def centrality_measures(self): centrality_measures = [] # betweenness continuous = True # unweighted unweighted_betweenness_distribution = nx.betweenness_centrality(self.G) [unweighted_betweenness_mean, unweighted_betweenness_sd ] = self.Stats.get_mean_sd(unweighted_betweenness_distribution) self.features[0].append(unweighted_betweenness_mean) self.features[0].append(unweighted_betweenness_sd) unweighted_betweenness_cumulative_distribution = self.Stats.get_cumulative_distribution( unweighted_betweenness_distribution, 0) unweighted_betweenness_cumulative_distribution_agg = self.Stats.get_cumulative_distribution( unweighted_betweenness_distribution, self.aggregate_number) centrality_measures.append( unweighted_betweenness_cumulative_distribution_agg) # - computing the KS test self.Stats.kolmogorov_smirnov( unweighted_betweenness_cumulative_distribution[0], unweighted_betweenness_cumulative_distribution_agg[0], continuous) # weighted weighted_betweenness_distribution = nx.betweenness_centrality( self.G, weight=self.weight_id) [weighted_betweenness_mean, weighted_betweenness_sd ] = self.Stats.get_mean_sd(weighted_betweenness_distribution) self.features[0].append(weighted_betweenness_mean) self.features[0].append(weighted_betweenness_sd) weighted_betweenness_cumulative_distribution = self.Stats.get_cumulative_distribution( weighted_betweenness_distribution, 0) weighted_betweenness_cumulative_distribution_agg = self.Stats.get_cumulative_distribution( weighted_betweenness_distribution, self.aggregate_number) centrality_measures.append( weighted_betweenness_cumulative_distribution_agg) # - computing the KS test self.Stats.kolmogorov_smirnov( weighted_betweenness_cumulative_distribution[0], weighted_betweenness_cumulative_distribution_agg[0], continuous) # eigen vector eigenvector_distribution = nx.eigenvector_centrality(self.G) [eigenvector_mean, eigenvector_sd] = self.Stats.get_mean_sd(eigenvector_distribution) self.features[0].append(eigenvector_mean) self.features[0].append(eigenvector_sd) eigenvector_cumulative_distribution = self.Stats.get_cumulative_distribution( eigenvector_distribution, 0) eigenvector_cumulative_distribution_agg = self.Stats.get_cumulative_distribution( eigenvector_distribution, self.aggregate_number) centrality_measures.append(eigenvector_cumulative_distribution_agg) # - computing the KS test self.Stats.kolmogorov_smirnov( eigenvector_cumulative_distribution[0], eigenvector_cumulative_distribution_agg[0], continuous) self.features.append(centrality_measures) # ------------------------------------------------------------- # # transversal_measures() # # ------------------------------------------------------------- def transversal_measures(self): transversal_measures = [] continuous = False # - V(k) # all degree_volumes = [] keys = self.degree_distribution.keys() for key in keys: degree = self.degree_distribution[key] volume = self.volume_distribution[key] degree_volumes.append([degree, volume]) V_k = self.Stats.get_dependency(degree_volumes) # - getting the aggregate dependency V_k_agg = self.Stats.aggregate_distribution(V_k, self.aggregate_number) transversal_measures.append(V_k_agg) # - adding the sd of the real distribution transversal_measures.append(V_k[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(V_k[1], V_k_agg[1], continuous) self.Stats.r_square([x[0] for x in degree_volumes], [x[1] for x in degree_volumes]) # in in_degree_volumes = [] keys = [] keys = self.in_degree_distribution.keys() for key in keys: in_degree = self.in_degree_distribution[key] in_volume = self.in_volume_distribution[key] in_degree_volumes.append([in_degree, in_volume]) V_k_in = self.Stats.get_dependency(in_degree_volumes) # - getting the aggregate dependency V_k_in_agg = self.Stats.aggregate_distribution(V_k_in, self.aggregate_number) transversal_measures.append(V_k_in_agg) # - adding the sd of the real distribution transversal_measures.append(V_k_in[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(V_k_in[1], V_k_in_agg[1], continuous) self.Stats.r_square([x[0] for x in in_degree_volumes], [x[1] for x in in_degree_volumes]) # out out_degree_volumes = [] keys = [] keys = self.out_degree_distribution.keys() for key in keys: out_degree = self.out_degree_distribution[key] out_volume = self.out_volume_distribution[key] out_degree_volumes.append([out_degree, out_volume]) V_k_out = self.Stats.get_dependency(out_degree_volumes) # - getting the aggregate dependency V_k_out_agg = self.Stats.aggregate_distribution( V_k_out, self.aggregate_number) transversal_measures.append(V_k_out_agg) # - adding the sd of the real distribution transversal_measures.append(V_k_out[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(V_k_out[1], V_k_out_agg[1], continuous) self.Stats.r_square([x[0] for x in out_degree_volumes], [x[1] for x in out_degree_volumes]) # - C(k) G_undirected = self.G.to_undirected() undirected_degree_distribution = G_undirected.degree() # unweighted cluster degree_unweighted_clusters = [] keys = undirected_degree_distribution.keys() for key in keys: degree = undirected_degree_distribution[key] unweighted_cluster = self.unweighted_clustering_distribution[key] degree_unweighted_clusters.append([degree, unweighted_cluster]) C_k_unweighted = self.Stats.get_dependency(degree_unweighted_clusters) # - getting the aggregate dependency C_k_unweighted_agg = self.Stats.aggregate_distribution( C_k_unweighted, self.aggregate_number) transversal_measures.append(C_k_unweighted_agg) # - adding the sd of the real distribution transversal_measures.append(C_k_unweighted[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(C_k_unweighted[1], C_k_unweighted_agg[1], continuous) self.Stats.r_square([x[0] for x in degree_unweighted_clusters], [x[1] for x in degree_unweighted_clusters]) # weighted cluster degree_weighted_clusters = [] # keys = self.degree_distribution.keys() for key in keys: degree = undirected_degree_distribution[key] weighted_cluster = self.weighted_clustering_distribution[key] degree_weighted_clusters.append([degree, weighted_cluster]) C_k_weighted = self.Stats.get_dependency(degree_weighted_clusters) # - getting the aggregate dependency C_k_weighted_agg = self.Stats.aggregate_distribution( C_k_weighted, self.aggregate_number) transversal_measures.append(C_k_weighted_agg) # - adding the sd of the real distribution transversal_measures.append(C_k_weighted[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(C_k_weighted[1], C_k_weighted_agg[1], continuous) self.Stats.r_square([x[0] for x in degree_weighted_clusters], [x[1] for x in degree_weighted_clusters]) # - Vij # average weight of links for Ki*Kj edges_volume_degree = [] for edge in self.G.edges(data=True): node1_degree = self.out_degree_distribution[edge[0]] node2_degree = self.in_degree_distribution[edge[1]] weight = edge[2][self.weight_id] edges_volume_degree.append([node1_degree * node2_degree, weight]) volume_end_point_degree = self.Stats.get_dependency( edges_volume_degree) transversal_measures.append(volume_end_point_degree) # - Knn # unweighted # undirected average_neighbor_degrees = nx.average_neighbor_degree(self.G) average_neighbor_degree_k = [] for key in keys: degree = undirected_degree_distribution[key] average_neighbor_degree = average_neighbor_degrees[key] average_neighbor_degree_k.append([degree, average_neighbor_degree]) average_neighbor_degree_k_dep = self.Stats.get_dependency( average_neighbor_degree_k) # adding to the general values [average_neighbor_degree_mean, average_neighbor_degree_sd ] = self.Stats.get_mean_sd(average_neighbor_degrees) self.features[0].append(average_neighbor_degree_mean) self.features[0].append(average_neighbor_degree_sd) # - getting the aggregate dependency average_neighbor_degree_k_agg = self.Stats.aggregate_distribution( average_neighbor_degree_k_dep, self.aggregate_number) transversal_measures.append(average_neighbor_degree_k_agg) # - adding the sd of the real distribution transversal_measures.append(average_neighbor_degree_k_dep[2]) # - computing the KS and R square test self.Stats.kolmogorov_smirnov(average_neighbor_degree_k_dep[1], average_neighbor_degree_k_agg[1], continuous) self.Stats.r_square([x[0] for x in average_neighbor_degree_k], [x[1] for x in average_neighbor_degree_k]) # weighted # undirected average_neighbor_degrees_weighted = nx.average_neighbor_degree( self.G, weight=self.weight_id) average_neighbor_degree_weighted_k = [] for key in keys: degree = undirected_degree_distribution[key] average_neighbor_degree_weighted = average_neighbor_degrees_weighted[ key] average_neighbor_degree_weighted_k.append( [degree, average_neighbor_degree_weighted]) average_neighbor_degree_weighted_k_dep = self.Stats.get_dependency( average_neighbor_degree_weighted_k) # adding to the general values [ average_neighbor_degree_weighted_mean, average_neighbor_degree_weighted_sd ] = self.Stats.get_mean_sd(average_neighbor_degrees_weighted) self.features[0].append(average_neighbor_degree_weighted_mean) self.features[0].append(average_neighbor_degree_weighted_sd) # - getting the aggregate dependency average_neighbor_degree_weighted_k_agg = self.Stats.aggregate_distribution( average_neighbor_degree_weighted_k_dep, self.aggregate_number) transversal_measures.append(average_neighbor_degree_weighted_k_agg) # - adding the sd of the real distribution transversal_measures.append(average_neighbor_degree_weighted_k_dep[2]) # - computing the KS and R square test self.Stats.kolmogorov_smirnov( average_neighbor_degree_weighted_k_dep[1], average_neighbor_degree_weighted_k_agg[1], continuous) self.Stats.r_square([x[0] for x in average_neighbor_degree_weighted_k], [x[1] for x in average_neighbor_degree_weighted_k]) self.features.append(transversal_measures) # Giving work to Matlab def save_extra(self): self.Stats.save_ks_s()
class NetworkHandler: # ------------------------------------------------------------- # # init (directory, name, weight_id, aggregate_number) # # ------------------------------------------------------------- def __init__(self, directory, name, weight_id, aggregate_number): self.name = name self.directory = directory self.G = nx.read_gexf(self.directory+self.name+'.gexf') self.weight_id = weight_id self.features = [] self.aggregate_number = aggregate_number self.Stats = StatsHandler(name) # ------------------------------------------------------------- # # set_general_values() # # ------------------------------------------------------------- def set_general_values(self): general_values = [] general_values.append("general values:") # size general_values.append(len(self.G.nodes())) # edges general_values.append(len(self.G.edges())) # total volume total_volume = 0.0 for edge in self.G.edges(data = True): total_volume += edge[2][self.weight_id] general_values.append(total_volume) # nb lenders nb_lenders = 0 out_deg = self.G.out_degree() for deg in out_deg.values(): if deg > 0: nb_lenders += 1 general_values.append(nb_lenders) # nb borrowers nb_borrowers = 0 in_deg = self.G.in_degree() for deg in in_deg.values(): if deg > 0: nb_borrowers += 1 general_values.append(nb_borrowers) # avg degree deg = self.G.degree() general_values.append(float(sum(deg.values()))/len(deg)) # print general_values self.features.append(general_values) # ------------------------------------------------------------- # # set_degree_distribution # computes cumulative distribution for # all - in - out # and # computes correlation between in and out # # ------------------------------------------------------------- def set_degree_analysis(self): # for KS statistical test # to verify continuous = False degree_distributions = [] degree_distributions.append("degree distribution") # total degree title = "total degree" degree_distributions.append(title) self.degree_distribution = self.G.degree() [degree_cumulative_distribution_agg, degree_distribution_agg_sd] = self.Stats.analyze_distribution(self.degree_distribution, self.aggregate_number, continuous, title) # # - compute cdf for real data # degree_cumulative_distribution = self.Stats.get_cumulative_distribution(self.degree_distribution) # # - compute aggregated values # [degree_distribution_agg, # degree_distribution_agg_sd] = self.Stats.aggregate_distribution(self.degree_distribution, self.aggregate_number, True) # # - compute cdf for aggregated # degree_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(degree_distribution_agg) # # - store sd from aggregated to real data degree_distributions.append("standard error") degree_distributions.append(degree_distribution_agg_sd) # - store cdf aggregated degree_distributions.append("aggregated cdf") degree_distributions.append(degree_cumulative_distribution_agg) # # - computing the Kolmogorov-Smirnov test # self.Stats.kolmogorov_smirnov(degree_cumulative_distribution[0], # degree_cumulative_distribution_agg[0], continuous) # # # in degree # degree_distributions.append("\nin degree:") # self.in_degree_distribution = self.G.in_degree() # in_degree_cumulative_distribution = self.Stats.get_cumulative_distribution(self.in_degree_distribution, 0) # in_degree_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(self.in_degree_distribution, self.aggregate_number) # degree_distributions.append(in_degree_cumulative_distribution_agg) # # - computing the Kolmogorov-Smirnov test # self.Stats.kolmogorov_smirnov(in_degree_cumulative_distribution[0], # in_degree_cumulative_distribution_agg[0], continuous) # # out degree # degree_distributions.append("\nout degree:") # self.out_degree_distribution = self.G.out_degree() # out_degree_cumulative_distribution = self.Stats.get_cumulative_distribution(self.out_degree_distribution, 0) # out_degree_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(self.out_degree_distribution, self.aggregate_number) # degree_distributions.append(out_degree_cumulative_distribution_agg) # # # - computing the Kolmogorov-Smirnov test # self.Stats.kolmogorov_smirnov(out_degree_cumulative_distribution[0], # out_degree_cumulative_distribution_agg[0], continuous) # # correlation # degree_distributions.append("\nin out correlation:") # # - dependency # keys = self.G.nodes() # d_in_d_out = [] # for key in keys: # d_in_d_out.append([self.in_degree_distribution[key],self.out_degree_distribution[key]]) # deg_in_deg_out_dependency = self.Stats.get_dependency(d_in_d_out) # # - getting the aggregate dependency # deg_in_deg_out_dependency_agg = self.Stats.aggregate_distribution(deg_in_deg_out_dependency, self.aggregate_number) # degree_distributions.append(deg_in_deg_out_dependency_agg) # # - adding the sd of the real distribution after dependency computation # degree_distributions.append(deg_in_deg_out_dependency[2]) # # - computing the Kolmogorov-Smirnov test # self.Stats.kolmogorov_smirnov(deg_in_deg_out_dependency[1], deg_in_deg_out_dependency_agg[1], continuous) # # # - r_square # self.Stats.r_square([x[0] for x in d_in_d_out],[x[1] for x in d_in_d_out]) # STORING RESULTS self.features.append(degree_distributions) # ------------------------------------------------------------- # # set_volume_distribution() # # ------------------------------------------------------------- def set_volume_distribution(self): # for KS statistical test continuous = True volume_distributions = [] volume_distributions.append("volume distribution") # total volume volume_distributions.append("\nvolume total:") self.volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data = True): if node in edge[1] or node in edge[0]: volume += edge [2][self.weight_id] self.volume_distribution[node] = volume volume_cumulative_distribution = self.Stats.get_cumulative_distribution(self.volume_distribution, 0) volume_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(self.volume_distribution, self.aggregate_number) volume_distributions.append(volume_cumulative_distribution_agg) # - computing the KS test # volume_cumulative_distribution_agg_ks = self.Stats.kolmogorov_smirnov(volume_cumulative_distribution[0], volume_cumulative_distribution_agg[0], continuous) # volume_distributions.append(volume_cumulative_distribution_agg_ks) # in volume volume_distributions.append("\nin total:") self.in_volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data = True): if node in edge[1]: volume += edge [2][self.weight_id] self.in_volume_distribution[node] = volume in_volume_cumulative_distribution = self.Stats.get_cumulative_distribution(self.in_volume_distribution, 0) in_volume_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(self.in_volume_distribution, self.aggregate_number) volume_distributions.append(in_volume_cumulative_distribution_agg) # - computing the KS test # in_volume_cumulative_distribution_agg_ks = self.Stats.kolmogorov_smirnov(in_volume_cumulative_distribution[0], in_volume_cumulative_distribution_agg[0], continuous) # volume_distributions.append(in_volume_cumulative_distribution_agg_ks) # out volume volume_distributions.append("\nout total:") self.out_volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data = True): if node in edge[0]: volume += edge [2][self.weight_id] self.out_volume_distribution[node] = volume out_volume_cumulative_distribution = self.Stats.get_cumulative_distribution(self.out_volume_distribution, 0) out_volume_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(self.out_volume_distribution, self.aggregate_number) volume_distributions.append(out_volume_cumulative_distribution_agg) # - computing the KS test # out_volume_cumulative_distribution_agg_ks = self.Stats.kolmogorov_smirnov(out_volume_cumulative_distribution[0], out_volume_cumulative_distribution_agg[0], continuous) # volume_distributions.append(out_volume_cumulative_distribution_agg_ks) # correlation volume_distributions.append("\nin out correlation:") # dependency keys = self.G.nodes() v_in_v_out = [] for key in keys: v_in_v_out.append([self.in_volume_distribution[key],self.out_volume_distribution[key]]) vol_in_vol_out_dependency = self.Stats.get_dependency(v_in_v_out) # - getting the aggregate dependency vol_in_vol_out_dependency_agg = self.Stats.aggregate_distribution(vol_in_vol_out_dependency, self.aggregate_number) volume_distributions.append(vol_in_vol_out_dependency_agg) # - adding the sd of the real distribution volume_distributions.append(vol_in_vol_out_dependency[2]) # - computing the KS test # vol_in_vol_out_ks = self.Stats.kolmogorov_smirnov(vol_in_vol_out_dependency[1], vol_in_vol_out_dependency_agg[1], continuous) # volume_distributions.append(vol_in_vol_out_ks) # - r_square # vol_in_vol_out_r = self.Stats.r_square([x[0] for x in v_in_v_out],[x[1] for x in v_in_v_out]) # volume_distributions.append(vol_in_vol_out_r) self.features.append(volume_distributions) # ------------------------------------------------------------- # # set_clustering_distribution () # # ------------------------------------------------------------- def set_clustering_distribution(self): # only indirected G_undirected = self.G.to_undirected() # for KS statistical test continuous = True clustering_distributions = [] clustering_distributions.append("clustering distribution") # unweighted clustering_distributions.append("\nunweighted:") self.unweighted_clustering_distribution = nx.clustering(G_undirected) unweighted_clustering_cumulative_distribution = self.Stats.get_cumulative_distribution(self.unweighted_clustering_distribution, 0) unweighted_clustering_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(self.unweighted_clustering_distribution, self.aggregate_number) clustering_distributions.append(unweighted_clustering_cumulative_distribution_agg) # - computing the KS test # unweighted_clustering_cumulative_distribution_agg_ks = self.Stats.kolmogorov_smirnov(unweighted_clustering_cumulative_distribution[0], unweighted_clustering_cumulative_distribution_agg[0], continuous) # clustering_distributions.append(unweighted_clustering_cumulative_distribution_agg_ks) # adding the average value to the general values [average_unweighted_clustering,sd_unweighted_clustering] = self.Stats.get_mean_sd(self.unweighted_clustering_distribution) self.features[0].append(average_unweighted_clustering) self.features[0].append(sd_unweighted_clustering) # weighted clustering_distributions.append("\nweighted:") self.weighted_clustering_distribution = nx.clustering(G_undirected, G_undirected.nodes(), self.weight_id) weighted_clustering_cumulative_distribution = self.Stats.get_cumulative_distribution(self.weighted_clustering_distribution, 0) weighted_clustering_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(self.weighted_clustering_distribution, self.aggregate_number) clustering_distributions.append(weighted_clustering_cumulative_distribution_agg) # - computing the KS test # weighted_clustering_cumulative_distribution_agg_ks = self.Stats.kolmogorov_smirnov(weighted_clustering_cumulative_distribution[0], weighted_clustering_cumulative_distribution_agg[0], continuous) # clustering_distributions.append(weighted_clustering_cumulative_distribution_agg_ks) # adding the average value to the general values [average_weighted_clustering,sd_weighted_clustering] = self.Stats.get_mean_sd(self.weighted_clustering_distribution) self.features[0].append(average_weighted_clustering) self.features[0].append(sd_weighted_clustering) self.features.append(clustering_distributions) # ------------------------------------------------------------- # # scc_analysis() # # ------------------------------------------------------------- def scc_analysis(self): scc_stats = [] sccs = nx.strongly_connected_component_subgraphs(self.G) # adding values to the general values lscc = sccs[0] avg_shortest_path_lentgh = nx.average_shortest_path_length(lscc) diameter = nx.diameter(lscc) self.features[0].append(avg_shortest_path_lentgh) self.features[0].append(diameter) # number of sccs n_scc = len(sccs) scc_stats.append(n_scc) # nodes per sccs nodes_scc = [] for subgraph in sccs: nodes_scc.append(len(subgraph.nodes())) scc_stats.append(nodes_scc) # links per sccs links_scc = [] for subgraph in sccs: links_scc.append(len(subgraph.edges())) scc_stats.append(links_scc) # volume per sccs volumes_scc = [] for subgraph in sccs: volume = 0.0 for edge in subgraph.edges(data = True): volume += edge[2][self.weight_id] volumes_scc.append(volume) scc_stats.append(volumes_scc) # Bow Tie analysis for the largest SCC nodes_in_lscc = nx.strongly_connected_components(self.G)[0] other_nodes = list(set(self.G.nodes())^set(nodes_in_lscc)) in_nodes = [] out_nodes = [] for node in other_nodes: edges = self.G.edges() stop = False i=0 while (stop == False and i < len(edges)-1): if node in edges[i]: if edges[i][1] in nodes_in_lscc: in_nodes.append(node) stop = True else: if edges[i][0] in nodes_in_lscc: out_nodes.append(node) stop = True i += 1 disconnected_nodes = list(set(other_nodes)^set(in_nodes)^set(out_nodes)) size = len(self.G.nodes()) scc_stats.extend([float(len(nodes_in_lscc))/size, float(len(in_nodes))/size, float(len(out_nodes))/size, float(len(disconnected_nodes))/size]) self.features.append(scc_stats) # ------------------------------------------------------------- # # centrality_measures() # # ------------------------------------------------------------- def centrality_measures(self): centrality_measures = [] # betweenness continuous = True # unweighted unweighted_betweenness_distribution = nx.betweenness_centrality(self.G) [unweighted_betweenness_mean,unweighted_betweenness_sd] = self.Stats.get_mean_sd (unweighted_betweenness_distribution) self.features[0].append(unweighted_betweenness_mean) self.features[0].append(unweighted_betweenness_sd) unweighted_betweenness_cumulative_distribution = self.Stats.get_cumulative_distribution(unweighted_betweenness_distribution,0) unweighted_betweenness_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(unweighted_betweenness_distribution, self.aggregate_number) centrality_measures.append(unweighted_betweenness_cumulative_distribution_agg) # - computing the KS test self.Stats.kolmogorov_smirnov(unweighted_betweenness_cumulative_distribution[0], unweighted_betweenness_cumulative_distribution_agg[0], continuous) # weighted weighted_betweenness_distribution = nx.betweenness_centrality(self.G, weight = self.weight_id) [weighted_betweenness_mean, weighted_betweenness_sd] = self.Stats.get_mean_sd(weighted_betweenness_distribution) self.features[0].append(weighted_betweenness_mean) self.features[0].append(weighted_betweenness_sd) weighted_betweenness_cumulative_distribution = self.Stats.get_cumulative_distribution (weighted_betweenness_distribution, 0) weighted_betweenness_cumulative_distribution_agg = self.Stats.get_cumulative_distribution (weighted_betweenness_distribution, self.aggregate_number) centrality_measures.append(weighted_betweenness_cumulative_distribution_agg) # - computing the KS test self.Stats.kolmogorov_smirnov(weighted_betweenness_cumulative_distribution[0], weighted_betweenness_cumulative_distribution_agg[0], continuous) # eigen vector eigenvector_distribution = nx.eigenvector_centrality(self.G) [eigenvector_mean, eigenvector_sd] = self.Stats.get_mean_sd(eigenvector_distribution) self.features[0].append(eigenvector_mean) self.features[0].append(eigenvector_sd) eigenvector_cumulative_distribution = self.Stats.get_cumulative_distribution(eigenvector_distribution, 0) eigenvector_cumulative_distribution_agg = self.Stats.get_cumulative_distribution(eigenvector_distribution, self.aggregate_number) centrality_measures.append(eigenvector_cumulative_distribution_agg) # - computing the KS test self.Stats.kolmogorov_smirnov(eigenvector_cumulative_distribution[0], eigenvector_cumulative_distribution_agg[0], continuous) self.features.append(centrality_measures) # ------------------------------------------------------------- # # transversal_measures() # # ------------------------------------------------------------- def transversal_measures(self): transversal_measures = [] continuous = False # - V(k) # all degree_volumes = [] keys = self.degree_distribution.keys() for key in keys: degree = self.degree_distribution[key] volume = self.volume_distribution[key] degree_volumes.append([degree,volume]) V_k = self.Stats.get_dependency(degree_volumes) # - getting the aggregate dependency V_k_agg = self.Stats.aggregate_distribution(V_k, self.aggregate_number) transversal_measures.append(V_k_agg) # - adding the sd of the real distribution transversal_measures.append(V_k[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(V_k[1],V_k_agg[1],continuous) self.Stats.r_square([x[0] for x in degree_volumes],[x[1] for x in degree_volumes]) # in in_degree_volumes = [] keys = [] keys = self.in_degree_distribution.keys() for key in keys: in_degree = self.in_degree_distribution[key] in_volume = self.in_volume_distribution[key] in_degree_volumes.append([in_degree,in_volume]) V_k_in = self.Stats.get_dependency(in_degree_volumes) # - getting the aggregate dependency V_k_in_agg = self.Stats.aggregate_distribution(V_k_in, self.aggregate_number) transversal_measures.append(V_k_in_agg) # - adding the sd of the real distribution transversal_measures.append(V_k_in[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(V_k_in[1],V_k_in_agg[1],continuous) self.Stats.r_square([x[0] for x in in_degree_volumes],[x[1] for x in in_degree_volumes]) # out out_degree_volumes = [] keys = [] keys = self.out_degree_distribution.keys() for key in keys: out_degree = self.out_degree_distribution[key] out_volume = self.out_volume_distribution[key] out_degree_volumes.append([out_degree,out_volume]) V_k_out = self.Stats.get_dependency(out_degree_volumes) # - getting the aggregate dependency V_k_out_agg = self.Stats.aggregate_distribution(V_k_out, self.aggregate_number) transversal_measures.append(V_k_out_agg) # - adding the sd of the real distribution transversal_measures.append(V_k_out[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(V_k_out[1],V_k_out_agg[1],continuous) self.Stats.r_square([x[0] for x in out_degree_volumes],[x[1] for x in out_degree_volumes]) # - C(k) G_undirected = self.G.to_undirected() undirected_degree_distribution = G_undirected.degree() # unweighted cluster degree_unweighted_clusters = [] keys = undirected_degree_distribution.keys() for key in keys: degree = undirected_degree_distribution[key] unweighted_cluster = self.unweighted_clustering_distribution[key] degree_unweighted_clusters.append([degree,unweighted_cluster]) C_k_unweighted = self.Stats.get_dependency(degree_unweighted_clusters) # - getting the aggregate dependency C_k_unweighted_agg = self.Stats.aggregate_distribution(C_k_unweighted, self.aggregate_number) transversal_measures.append(C_k_unweighted_agg) # - adding the sd of the real distribution transversal_measures.append(C_k_unweighted[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(C_k_unweighted[1],C_k_unweighted_agg[1],continuous) self.Stats.r_square([x[0] for x in degree_unweighted_clusters],[x[1] for x in degree_unweighted_clusters]) # weighted cluster degree_weighted_clusters = [] # keys = self.degree_distribution.keys() for key in keys: degree = undirected_degree_distribution[key] weighted_cluster = self.weighted_clustering_distribution[key] degree_weighted_clusters.append([degree,weighted_cluster]) C_k_weighted = self.Stats.get_dependency(degree_weighted_clusters) # - getting the aggregate dependency C_k_weighted_agg = self.Stats.aggregate_distribution(C_k_weighted, self.aggregate_number) transversal_measures.append(C_k_weighted_agg) # - adding the sd of the real distribution transversal_measures.append(C_k_weighted[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(C_k_weighted[1],C_k_weighted_agg[1],continuous) self.Stats.r_square([x[0] for x in degree_weighted_clusters],[x[1] for x in degree_weighted_clusters]) # - Vij # average weight of links for Ki*Kj edges_volume_degree = [] for edge in self.G.edges(data = True): node1_degree = self.out_degree_distribution[edge[0]] node2_degree = self.in_degree_distribution[edge[1]] weight = edge[2][self.weight_id] edges_volume_degree.append([node1_degree*node2_degree, weight]) volume_end_point_degree = self.Stats.get_dependency(edges_volume_degree) transversal_measures.append(volume_end_point_degree) # - Knn # unweighted # undirected average_neighbor_degrees = nx.average_neighbor_degree(self.G) average_neighbor_degree_k = [] for key in keys: degree = undirected_degree_distribution[key] average_neighbor_degree = average_neighbor_degrees[key] average_neighbor_degree_k.append([degree,average_neighbor_degree]) average_neighbor_degree_k_dep = self.Stats.get_dependency(average_neighbor_degree_k) # adding to the general values [average_neighbor_degree_mean, average_neighbor_degree_sd] = self.Stats.get_mean_sd(average_neighbor_degrees) self.features[0].append(average_neighbor_degree_mean) self.features[0].append(average_neighbor_degree_sd) # - getting the aggregate dependency average_neighbor_degree_k_agg = self.Stats.aggregate_distribution(average_neighbor_degree_k_dep, self.aggregate_number) transversal_measures.append(average_neighbor_degree_k_agg) # - adding the sd of the real distribution transversal_measures.append(average_neighbor_degree_k_dep[2]) # - computing the KS and R square test self.Stats.kolmogorov_smirnov(average_neighbor_degree_k_dep[1], average_neighbor_degree_k_agg[1], continuous) self.Stats.r_square([x[0] for x in average_neighbor_degree_k],[x[1] for x in average_neighbor_degree_k]) # weighted # undirected average_neighbor_degrees_weighted = nx.average_neighbor_degree(self.G, weight = self.weight_id) average_neighbor_degree_weighted_k = [] for key in keys: degree = undirected_degree_distribution[key] average_neighbor_degree_weighted = average_neighbor_degrees_weighted[key] average_neighbor_degree_weighted_k.append([degree,average_neighbor_degree_weighted]) average_neighbor_degree_weighted_k_dep = self.Stats.get_dependency(average_neighbor_degree_weighted_k) # adding to the general values [average_neighbor_degree_weighted_mean, average_neighbor_degree_weighted_sd] = self.Stats.get_mean_sd(average_neighbor_degrees_weighted) self.features[0].append(average_neighbor_degree_weighted_mean) self.features[0].append(average_neighbor_degree_weighted_sd) # - getting the aggregate dependency average_neighbor_degree_weighted_k_agg = self.Stats.aggregate_distribution(average_neighbor_degree_weighted_k_dep, self.aggregate_number) transversal_measures.append(average_neighbor_degree_weighted_k_agg) # - adding the sd of the real distribution transversal_measures.append(average_neighbor_degree_weighted_k_dep[2]) # - computing the KS and R square test self.Stats.kolmogorov_smirnov(average_neighbor_degree_weighted_k_dep[1], average_neighbor_degree_weighted_k_agg[1], continuous) self.Stats.r_square([x[0] for x in average_neighbor_degree_weighted_k],[x[1] for x in average_neighbor_degree_weighted_k]) self.features.append(transversal_measures) # Giving work to Matlab def save_extra(self): self.Stats.save_ks_s()
class NetworkHandler: # ------------------------------------------------------------- # # init (directory, name, weight_id, aggregate_number) # # ------------------------------------------------------------- def __init__(self, directory, name, weight_id, aggregate_number): self.name = name self.directory = directory self.G = nx.read_gexf(self.directory+self.name+'.gexf') self.weight_id = weight_id self.features = [] self.aggregate_number = aggregate_number self.Stats = StatsHandler(name) self.standard_text_distribution = ',standard deviation,skewness,kurtosis,hhi,q90%,q80%,q70%,q60%,q50%,q40%,q30%,q20%,q10%,q5%,q1%' # ------------------------------------------------------------- # # set_general_values() # # ------------------------------------------------------------- def set_general_values(self): general_values = [] # size general_values.append(len(self.G.nodes())) txt = ',number of nodes' # edges general_values.append(len(self.G.edges())) txt += ',number of edges' # nb lenders nb_lenders = 0 out_deg = self.G.out_degree() for deg in out_deg.values(): if deg > 0: nb_lenders += 1 general_values.append(nb_lenders) txt += ',number of lenders' # nb borrowers nb_borrowers = 0 in_deg = self.G.in_degree() for deg in in_deg.values(): if deg > 0: nb_borrowers += 1 general_values.append(nb_borrowers) txt += ',number of borrowers' return [general_values, txt] # ------------------------------------------------------------- # # set_degree_distribution # computes cumulative distribution for # all - in - out # and # computes correlation between in and out # # ------------------------------------------------------------- def set_degree_analysis(self): degree_analysis = [] txt = '' # TOTAL self.degree_distribution = self.G.degree() statistics = self.Stats.get_distribution_info(self.degree_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.degree_distribution, "total degree distribution") degree_analysis.extend(statistics[:5]) degree_analysis.extend(statistics[5]) txt += ',average degree' + self.standard_text_distribution # IN self.in_degree_distribution = self.G.in_degree() statistics = self.Stats.get_distribution_info(self.in_degree_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.in_degree_distribution, "in degree distribution") degree_analysis.extend(statistics[:5]) degree_analysis.extend(statistics[5]) txt += ',average in degree' + self.standard_text_distribution # OUT self.out_degree_distribution = self.G.out_degree() statistics = self.Stats.get_distribution_info(self.out_degree_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.out_degree_distribution, "out degree distribution") degree_analysis.extend(statistics[:5]) degree_analysis.extend(statistics[5]) txt += ',average out degree' + self.standard_text_distribution # CORRELATION keys = self.G.nodes() d_in = [] d_out = [] for key in keys: d_in.append(self.in_degree_distribution[key]) d_out.append(self.out_degree_distribution[key]) #storing complete distribution for statistical analysis self.Stats.r_square(d_in, d_out, "degree correlation" ) degree_analysis.extend('A') txt += ',correlation in out degree' #ASSORTATIVITY d_1 = [] d_2 = [] for edge in self.G.edges(): d_1.append(self.degree_distribution[edge[0]]) d_2.append(self.degree_distribution[edge[1]]) #storing complete distribution for statistical analysis self.Stats.r_square(d_1, d_2, "degree assortativity" ) degree_analysis.extend('A') txt += ',assortativity' #RECIPROCITY density = float(len(self.G.edges()))/(len(self.G.nodes())*(len(self.G.nodes())-1)) reciprocal_value_num = 0.0 reciprocal_value_den = 0.0 for i in range(len(self.G.nodes())): for j in range(len(self.G.nodes())): if i != j: a_ij = 0 a_ji = 0 if self.G.has_edge(self.G.nodes()[i],self.G.nodes()[j]): a_ij = 1 if self.G.has_edge(self.G.nodes()[j],self.G.nodes()[i]): a_ji = 1 reciprocal_value_num += (float(a_ij - density)*float(a_ji - density)) reciprocal_value_den += ((a_ij - density) * (a_ij - density)) reciprocal_value = float(reciprocal_value_num)/reciprocal_value_den degree_analysis.extend([reciprocal_value]) txt += ',reciprocity' return [degree_analysis, txt] # ------------------------------------------------------------- # # set_volume_distribution() # # ------------------------------------------------------------- def set_volume_distribution(self): volume_analysis = [] txt = '' # TOTAL self.volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data = True): if node in edge[1] or node in edge[0]: volume += edge [2][self.weight_id] self.volume_distribution[node] = volume total_volume = sum(self.volume_distribution.values()) volume_analysis.append(total_volume) statistics = self.Stats.get_distribution_info(self.volume_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.volume_distribution, "total volume distribution") volume_analysis.extend(statistics[:5]) volume_analysis.extend(statistics[5]) txt += ',full volume, average volume' + self.standard_text_distribution # IN self.in_volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data = True): if node in edge[1]: volume += edge [2][self.weight_id] self.in_volume_distribution[node] = volume tota_volume_in = sum(self.in_volume_distribution.values()) volume_analysis.append(tota_volume_in) statistics = self.Stats.get_distribution_info(self.in_volume_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.in_volume_distribution, "total in volume distribution") volume_analysis.extend(statistics[:5]) volume_analysis.extend(statistics[5]) txt += ',full in volume, average in volume' + self.standard_text_distribution # OUT self.out_volume_distribution = dict() for node in self.G.nodes(): volume = 0.0 for edge in self.G.edges(data = True): if node in edge[0]: volume += edge [2][self.weight_id] self.out_volume_distribution[node] = volume tota_volume_out = sum(self.out_volume_distribution.values()) volume_analysis.append(tota_volume_out) statistics = self.Stats.get_distribution_info(self.out_volume_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.out_volume_distribution, "total out volume distribution") volume_analysis.extend(statistics[:5]) volume_analysis.extend(statistics[5]) txt += ',full out volume, average out volume' + self.standard_text_distribution # # correlation keys = self.G.nodes() v_in = [] v_out = [] for key in keys: v_in.append(self.in_volume_distribution[key]) v_out.append(self.out_volume_distribution[key]) #storing complete distribution for statistical analysis self.Stats.r_square(v_in, v_out, "volume correlation" ) volume_analysis.extend('A') txt += ',correlatin in out volume' return [volume_analysis, txt] # ------------------------------------------------------------- # # set_clustering_distribution () # # ------------------------------------------------------------- def set_clustering_distribution(self): # only indirected G_undirected = self.G.to_undirected() clustering_distributions = [] txt = '' # unweighted self.unweighted_clustering_distribution = nx.clustering(G_undirected) statistics = self.Stats.get_distribution_info(self.unweighted_clustering_distribution) #storing complete distribution for statistical analysis self.Stats.ks_store(self.unweighted_clustering_distribution, "unweighted clustering distribution") clustering_distributions.extend(statistics[:5]) clustering_distributions.extend(statistics[5]) txt += ',average clustering coeficient (unweighted)' + self.standard_text_distribution # # weighted self.weighted_clustering_distribution = nx.clustering(G_undirected, G_undirected.nodes(), self.weight_id) # statistics = self.Stats.get_distribution_info(self.weighted_clustering_distribution) # #storing complete distribution for statistical analysis # self.Stats.ks_store(self.weighted_clustering_distribution, "weighted clustering distribution") # clustering_distributions.extend(statistics[:5]) # clustering_distributions.extend(statistics[5]) # txt += ',average clustering coeficient (weighted)' + self.standard_text_distribution return [clustering_distributions,txt] # ------------------------------------------------------------- # # centrality_measures() # # ------------------------------------------------------------- def centrality_measures(self): centrality_measures = [] txt = '' # betweenness # unweighted self.unweighted_betweenness_distribution = nx.betweenness_centrality(self.G) statistics = self.Stats.get_distribution_info(self.unweighted_betweenness_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) txt += ',average betweenness centrality (unweighted)' + self.standard_text_distribution # # weighted self.weighted_betweenness_distribution = nx.betweenness_centrality(self.G, weight = self.weight_id) # statistics = self.Stats.get_distribution_info(self.weighted_betweenness_distribution) # centrality_measures.extend(statistics[:5]) # centrality_measures.extend(statistics[5]) # txt += ',average betweenness centrality (weighted)' + self.standard_text_distribution # closeness # unweighted self.unweighted_closeness_distribution = nx.closeness_centrality(self.G) statistics = self.Stats.get_distribution_info(self.unweighted_closeness_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) txt += ',average closeness centrality (unweighted)' + self.standard_text_distribution # eigen vector # right try: self.right_eigenvector_distribution = nx.eigenvector_centrality(self.G) statistics = self.Stats.get_distribution_info(self.right_eigenvector_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) except: centrality_measures.extend([0,0,0,0,0]) centrality_measures.extend([0]*len(statistics[5])) txt += ',average right eigenvector' + self.standard_text_distribution # left try: G_rev = self.G.reverse() self.lef_eigenvector_distribution = nx.eigenvector_centrality(G_rev) statistics = self.Stats.get_distribution_info(self.lef_eigenvector_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) except: centrality_measures.extend([0,0,0,0,0]) centrality_measures.extend([0]*len(statistics[5])) txt += ',average left eigenvector' + self.standard_text_distribution return [centrality_measures, txt] # ------------------------------------------------------------- # # transversal_measures() # # ------------------------------------------------------------- def transversal_measures(self): transversal_measures = [] txt = '' # - V(k) # all title = "Vol(k) all" degrees = [] volumes = [] keys = self.degree_distribution.keys() for key in keys: degrees.append(self.degree_distribution[key]) volumes.append(self.volume_distribution[key]) self.Stats.r_square(degrees, volumes, title ) transversal_measures.extend('A') # - in title = "Vol(k) in" in_degrees = [] in_volumes = [] keys = [] keys = self.in_degree_distribution.keys() for key in keys: in_degrees.append(self.in_degree_distribution[key]) in_volumes.append(self.in_volume_distribution[key]) self.Stats.r_square(in_degrees, in_volumes, title) transversal_measures.extend('A') title = "Vol(k) out" out_degrees = [] out_volumes = [] keys = [] keys = self.out_degree_distribution.keys() for key in keys: out_degrees.append(self.out_degree_distribution[key]) out_volumes.append(self.out_volume_distribution[key]) self.Stats.r_square(out_degrees, out_volumes, title) transversal_measures.extend('A') # - C(k) G_undirected = self.G.to_undirected() undirected_degree_distribution = G_undirected.degree() # unweighted cluster title = "C(k) unweighted" degrees = [] unweighted_clusters = [] keys = undirected_degree_distribution.keys() for key in keys: degrees.append(undirected_degree_distribution[key]) unweighted_clusters.append(self.unweighted_clustering_distribution[key]) self.Stats.r_square(degrees, unweighted_clusters, title) transversal_measures.extend('A') # weighted cluster title = "C(k) weighted" degrees = [] weighted_clusters = [] keys = self.degree_distribution.keys() for key in keys: degrees.append(undirected_degree_distribution[key]) weighted_clusters.append(self.weighted_clustering_distribution[key]) self.Stats.r_square(degrees, weighted_clusters, title) transversal_measures.extend('A') # - Vij title = "Vij(kikj) with no aggregation" edges_volumes = [] degrees = [] for edge in self.G.edges(data = True): node1_degree = self.out_degree_distribution[edge[0]] node2_degree = self.in_degree_distribution[edge[1]] weight = edge[2][self.weight_id] edges_volumes.append(weight) degrees.append(node1_degree*node2_degree) self.Stats.r_square(degrees, edges_volumes, title) transversal_measures.extend('A') txt += ',correlation total volume degree,correlation in volume degree,correlation out volume degree,correlation unweighted cluster degree,correlation weighted cluster degree,correlation weight end degree product' return [transversal_measures,txt] # ------------------------------------------------------------- # # scc_analysis() # # ------------------------------------------------------------- def scc_analysis(self): scc_stats = [] txt = '' #WGCC analysis wccs = nx.weakly_connected_component_subgraphs(self.G) n_wcc = len(wccs) # new to add to list! scc_stats.append(n_wcc) txt += ',number of wccs' nodes_in_lwcc = nx.weakly_connected_components(self.G)[0] size = len(self.G.nodes()) # share in gwcc share = float(len(nodes_in_lwcc))/size lwcc = wccs[0] avg_shortest_path_lentgh = nx.average_shortest_path_length(lwcc) scc_stats.extend([share,avg_shortest_path_lentgh]) txt += ',LWCC - share of WCC,LWCC - shortest path length' # number of nodes n = len(nodes_in_lwcc) # number of links l = len(lwcc.edges()) # volume volume = 0.0 for edge in lwcc.edges(data = True): volume += edge[2][self.weight_id] # new to add to list! scc_stats.extend([n,l,volume]) txt += ',number of nodes,number of links,total volume' #LSCC analysis sccs = nx.strongly_connected_component_subgraphs(self.G) # number of sccs n_scc = len(sccs) scc_stats.append(n_scc) txt += ',number of sccs' # Bow Tie analysis for the largest SCC nodes_in_lscc = nx.strongly_connected_components(self.G)[0] other_nodes = list(set(self.G.nodes())^set(nodes_in_lscc)) in_nodes = [] out_nodes = [] for node in other_nodes: edges = self.G.edges() stop = False i=0 while (stop == False and i < len(edges)-1): if node in edges[i]: if edges[i][1] in nodes_in_lscc: in_nodes.append(node) stop = True else: if edges[i][0] in nodes_in_lscc: out_nodes.append(node) stop = True i += 1 disconnected_nodes = list(set(other_nodes)^set(in_nodes)^set(out_nodes)) size = len(self.G.nodes()) # scc_stats.extend([float(len(nodes_in_lscc))/size, float(len(in_nodes))/size, float(len(out_nodes))/size, float(len(disconnected_nodes))/size]) # SCC # share in scc share = float(len(nodes_in_lscc))/size lscc = sccs[0] avg_shortest_path_lentgh = nx.average_shortest_path_length(lscc) diameter = nx.diameter(lscc) scc_stats.extend([share,avg_shortest_path_lentgh,diameter]) txt += ',LSCC - share of scc,LSCC - shortest path lentgh,LSCC - diameter' # number of nodes n = len(nodes_in_lscc) # number of links l = len(lscc.edges()) # volume of edges inside the lscc volume_edges = 0.0 for edge in lscc.edges(data = True): volume_edges += edge[2][self.weight_id] # total, in and out volume of nodes inside the lscc total_volume_nodes = 0.0 in_volume_nodes = 0.0 out_volume_nodes = 0.0 for node in lscc.nodes(): total_volume_nodes += self.volume_distribution[node] in_volume_nodes += self.in_volume_distribution[node] out_volume_nodes += self.out_volume_distribution[node] scc_stats.extend([n,l,volume_edges,total_volume_nodes,in_volume_nodes,out_volume_nodes]) txt += ',number of nodes, number of links,volume edges, total volume nodes, in volume nodes, out volume nodes' # IN # share share = float(len(in_nodes))/size # number of nodes n = len(in_nodes) # number of links # volume n_links = 0 volume = 0.0 for edge in self.G.edges(data=True): # if edge[0] in in_nodes or edge[1] in in_nodes: if edge[0] in in_nodes and edge[1] in lscc: n_links += 1 volume += edge[2][self.weight_id] # total, in and out volume of nodes inside the IN total_volume_nodes = 0.0 in_volume_nodes = 0.0 out_volume_nodes = 0.0 for node in in_nodes: total_volume_nodes += self.volume_distribution[node] in_volume_nodes += self.in_volume_distribution[node] out_volume_nodes += self.out_volume_distribution[node] scc_stats.extend([share,n,l,volume_edges,total_volume_nodes,in_volume_nodes,out_volume_nodes]) txt += ',LSCC - share IN,number of nodes,number of links,volume edges,total volume nodes, in volume nodes, out volume nodes' # OUT # share share = float(len(out_nodes))/size # number of nodes n = len(out_nodes) # number of links # volume n_links = 0 volume = 0.0 for edge in self.G.edges(data=True): # if edge[0] in out_nodes or edge[1] in out_nodes: if edge[0] in lscc and edge[1] in out_nodes: n_links += 1 volume += edge[2][self.weight_id] # total, in and out volume of nodes inside the IN total_volume_nodes = 0.0 in_volume_nodes = 0.0 out_volume_nodes = 0.0 for node in out_nodes: total_volume_nodes += self.volume_distribution[node] in_volume_nodes += self.in_volume_distribution[node] out_volume_nodes += self.out_volume_distribution[node] scc_stats.extend([share,n,l,volume_edges,total_volume_nodes,in_volume_nodes,out_volume_nodes]) txt += ',LSCC - share OUT,number of nodes,number of links,volume edges,total volume nodes, in volume nodes, out volume nodes' # EIGENVECTOR IN LSCC #right try: self.right_eigenvector_distribution_lscc = nx.eigenvector_centrality(lscc) statistics = self.Stats.get_distribution_info(self.right_eigenvector_distribution_lscc) scc_stats.extend(statistics[:5]) scc_stats.extend(statistics[5]) except: scc_stats.extend([0,0,0,0,0]) # MAKE THE NUMBER OF PERCENTILES VARIABLE! scc_stats.extend([0]*11) txt += ',average right eigenvector lscc' + self.standard_text_distribution # left try: lscc_rev = lscc.reverse() self.lef_eigenvector_distribution_lscc = nx.eigenvector_centrality(lscc_rev) statistics = self.Stats.get_distribution_info(self.lef_eigenvector_distribution_lscc) scc_stats.extend(statistics[:5]) scc_stats.extend(statistics[5]) except: scc_stats.extend([0,0,0,0,0]) scc_stats.extend([0]*11) txt += ',average left eigenvector lscc' + self.standard_text_distribution # KATZ IN LSCC try: self.katz_distribution_lscc = nx.eigenvector_centrality(lscc) statistics = self.Stats.get_distribution_info(self.katz_distribution_lscc) scc_stats.extend(statistics[:5]) scc_stats.extend(statistics[5]) except: scc_stats.extend([0,0,0,0,0]) # MAKE THE NUMBER OF PERCENTILES VARIABLE! scc_stats.extend([0]*11) txt += ',average katz centrality' + self.standard_text_distribution return [scc_stats, txt] # Giving work to Matlab def save_extra(self): self.Stats.save_ks_s()