def test_effective_size_undirected(self): G = self.G.copy() nx.set_edge_attributes(G, 1, "weight") effective_size = nx.effective_size(G, weight="weight") assert almost_equal(effective_size["G"], 4.67, places=2) assert almost_equal(effective_size["A"], 2.50, places=2) assert almost_equal(effective_size["C"], 1, places=2)
def test_effective_size_undirected(self): G = self.G.copy() nx.set_edge_attributes(G, 1, 'weight') effective_size = nx.effective_size(G, weight='weight') assert_almost_equal(round(effective_size['G'], 2), 4.67) assert_almost_equal(round(effective_size['A'], 2), 2.50) assert_almost_equal(round(effective_size['C'], 2), 1)
def test_effective_size_weighted_undirected(self): G = self.G.copy() nx.set_edge_attributes(G, 'weight', self.G_weights) effective_size = nx.effective_size(G, weight='weight') assert_almost_equal(round(effective_size['G'], 2), 5.47) assert_almost_equal(round(effective_size['A'], 2), 2.47) assert_almost_equal(round(effective_size['C'], 2), 1)
def test_effective_size_weighted_directed(self): D = self.D.copy() nx.set_edge_attributes(D, 'weight', self.D_weights) effective_size = nx.effective_size(D, weight='weight') assert_almost_equal(round(effective_size[0], 3), 1.567) assert_almost_equal(round(effective_size[1], 3), 1.083) assert_almost_equal(round(effective_size[2], 3), 1)
def test_effective_size_undirected(self): G = self.G.copy() nx.set_edge_attributes(G, 1, 'weight') effective_size = nx.effective_size(G, weight='weight') assert almost_equal(effective_size['G'], 4.67, places=2) assert almost_equal(effective_size['A'], 2.50, places=2) assert almost_equal(effective_size['C'], 1, places=2)
def test_effective_size_undirected(self): G = self.G.copy() nx.set_edge_attributes(G, 1, "weight") effective_size = nx.effective_size(G, weight="weight") assert effective_size["G"] == pytest.approx(4.67, abs=1e-2) assert effective_size["A"] == pytest.approx(2.50, abs=1e-2) assert effective_size["C"] == pytest.approx(1, abs=1e-2)
def test_effective_size_weighted_directed(self): D = self.D.copy() nx.set_edge_attributes(D, self.D_weights, 'weight') effective_size = nx.effective_size(D, weight='weight') assert_almost_equal(round(effective_size[0], 3), 1.567) assert_almost_equal(round(effective_size[1], 3), 1.083) assert_almost_equal(round(effective_size[2], 3), 1)
def test_effective_size_weighted_directed(self): D = self.D.copy() nx.set_edge_attributes(D, self.D_weights, "weight") effective_size = nx.effective_size(D, weight="weight") assert effective_size[0] == pytest.approx(1.567, abs=1e-3) assert effective_size[1] == pytest.approx(1.083, abs=1e-3) assert effective_size[2] == pytest.approx(1, abs=1e-3)
def test_effective_size_weighted_directed(self): D = self.D.copy() nx.set_edge_attributes(D, self.D_weights, "weight") effective_size = nx.effective_size(D, weight="weight") assert almost_equal(effective_size[0], 1.567, places=3) assert almost_equal(effective_size[1], 1.083, places=3) assert almost_equal(effective_size[2], 1, places=3)
def calculate_networks_indicators(graph): """计算基本网络指标""" degree_centrality = nx.degree_centrality(graph) nodes = list(degree_centrality.keys()) betweenness_centrality = nx.betweenness_centrality(graph, weight='weight') network_indicators = pd.DataFrame({ 'nodes': nodes, 'degree_centrality': [degree_centrality[node] for node in nodes], 'betweenness_centrality': [betweenness_centrality[node] for node in nodes] }) network_indicators['local_reaching_centrality'] = [ nx.local_reaching_centrality(graph, node, weight='weight') for node in nodes ] constraint = nx.constraint(graph, weight='weight') network_indicators['constraint'] = [constraint[node] for node in nodes] effective_size = nx.effective_size(graph, weight='weight') network_indicators['effective_size'] = [ effective_size[node] for node in nodes ] triangles = nx.triangles(graph) network_indicators['triangles'] = [triangles[node] for node in nodes] clustering = nx.clustering(graph, weight='weight') network_indicators['clustering'] = [clustering[node] for node in nodes] weight_dict = { item[0]: item[1] for item in nx.degree(graph, weight='weight') } degree_dict = {item[0]: item[1] for item in nx.degree(graph)} average_weight_dict = { weight_key: (weight_dict[weight_key] / degree_dict[weight_key] if degree_dict[weight_key] != 0 else 0) for weight_key in weight_dict.keys() } network_indicators['tie_strength'] = [ average_weight_dict[node] for node in nodes ] network_indicators['number_of_node'] = nx.number_of_nodes(graph) network_indicators['density'] = nx.density(graph) cliques = nx.graph_clique_number(graph) if cliques >= 3: network_indicators['cliques'] = cliques else: network_indicators['cliques'] = 0 network_indicators['efficiency'] = nx.global_efficiency(graph) network_indicators['isolates'] = nx.number_of_isolates(graph) network_indicators = network_indicators[[ 'nodes', 'degree_centrality', 'betweenness_centrality', 'local_reaching_centrality', 'constraint', 'effective_size', 'triangles', 'clustering', 'tie_strength', 'number_of_node', 'density', 'cliques', 'efficiency', 'isolates' ]] return network_indicators
def __get_effective_size(graph: nx.Graph()): effective_size = dict( filter(lambda x: x[1] > 0, nx.effective_size(graph).items())) effective_size = { k: v for k, v in sorted( effective_size.items(), key=lambda item: item[1], reverse=True) } return effective_size
def effective_size(graph, nodes, year, indicator_type): effective_size = nx.effective_size(graph, weight='weight') data = pd.DataFrame({ 'nodes': nodes, 'effective_size': [effective_size[node] for node in nodes] }) if indicator_type == '三年期': excel_path = '../data/生成数据/04关系矩阵_中间指标/三年期/' + str(year) + '-' + str( year + 2) + '年竞争关系矩阵' else: excel_path = '../data/生成数据/04关系矩阵_中间指标/五年期/' + str(year) + '-' + str( year + 4) + '年竞争关系矩阵' folder = os.path.exists(excel_path) if not folder: os.makedirs(excel_path) data.to_excel(excel_writer=excel_path + '/effective_size指标.xlsx', index=False) print(str(year) + '年' + 'effective_size' + '计算完毕!')
def calculate_key_inventor_hole_label(base_data_array, inventor_patents_rows_dict, key_inventor): """ 计算关键研发者结构洞指数与中心度 :param base_data_array:原始数据 :param inventor_patents_count_dict: 用上一步的结果,用于获取所有节点 :return: """ inventor_data_rows = inventor_patents_rows_dict[key_inventor] # 获取关键研发者自我中心网中所有节点 network_node_list = get_key_inventor_partner(base_data_array, inventor_patents_rows_dict, key_inventor) # 初始化无向网络 key_inventor_network = nx.Graph() # 向网络中加入节点 key_inventor_network.add_nodes_from(network_node_list) # 向网络中增加边的连线 for row_index in inventor_data_rows: row_data = base_data_array[row_index] # 读取申请人所在列 inventor_value = row_data[const.INVENTOR_COL] if inventor_value: inventor_value_list = inventor_value.split(",") if inventor_value_list and len(inventor_value_list) > 1: # 从inventor_value_list中获取所有2个研发者的组合 for inventor_partner_array in itertools.combinations( inventor_value_list, 2): key_inventor_network.add_edge(inventor_partner_array[0], inventor_partner_array[1]) # 获取对应节点的度 key_inventor_degree = key_inventor_network.degree(key_inventor) if key_inventor_degree == 0: hole_effi = const.ALONE_NODE_HOLE_EFFI else: hole_effi = nx.effective_size( key_inventor_network)[key_inventor] / key_inventor_degree return [hole_effi, key_inventor_degree]
def parse_all_metrics(api, edge_df, user_id, directory=None, long=False): ''' Will get all Tier 3 metrics for a user_id Parameters ---------- api : Tweepy API hook edge_df : Edgelist of Pandas DataFrame user_id : User ID string directory : Directory to look for data The default is None. long : Whether to get metrics that take a long time. The default is False. Returns ------- Feature Data Frame ''' import pandas as pd import twitter_col import json, io, gzip, os import time import progressbar import networkx as nx from collections import Counter import community import numpy as np # user_id = '1919751' G = nx.from_pandas_edgelist(edge_df, 'from', 'to', edge_attr=['type'], create_using=nx.DiGraph()) # G=nx.gnp_random_graph(100, 0.4, seed=None, directed=True) G2 = G.to_undirected() largest_component = max(nx.connected_component_subgraphs(G2), key=len) print("Nodes in largest compo:", len(largest_component.nodes)) data = { "user_id": [], "scrape_date": [], "num_nodes": [], "num_links": [], "density": [], "isolates": [], "dyad_isolates": [], "triad_isolates": [], "compo_over_4": [], # "average_shortest_path_length": [], "clustering_coefficient": [], "transitivity": [], # "network_diameter": [], "reciprocity": [], "graph_degree_centrality": [], "graph_betweenness_centrality": [], "mean_eigen_centrality": [], "simmelian_ties": [], "triad_003": [], "triad_012": [], "triad_102": [], "triad_021D": [], "triad_021U": [], "triad_021C": [], "triad_111D": [], "triad_111U": [], "triad_030T": [], "triad_030C": [], "triad_201": [], "triad_120D": [], "triad_120U": [], "triad_120C": [], "triad_210": [], "triad_300": [], "num_louvaine_groups": [], "size_largest_louvaine_group": [], "ego_effective_size": [] } if long: data.pop("graph_betweenness_centrality") data.pop("ego_effective_size") data.pop("simmelian_ties") data['user_id'].append(user_id) data['scrape_date'].append(time.strftime('%Y%m%d-%H%M%S')) data['num_nodes'].append(nx.number_of_nodes(G)) data['num_links'].append(nx.number_of_edges(G)) data['density'].append(nx.density(G)) compo_sizes = [ len(c) for c in sorted(nx.connected_components(G2), key=len, reverse=True) ] compo_freq = Counter(compo_sizes) # print('isolates') data['isolates'].append(compo_freq[1]) # print('triad_islolates') data['triad_isolates'].append(compo_freq[3]) data['dyad_isolates'].append(compo_freq[2]) data['compo_over_4'].append(len([x for x in compo_sizes if x > 3])) # print('shortest path') # data['average_shortest_path_length'].append(nx.average_shortest_path_length(largest_component)) # print('clustering_coefficient') data['clustering_coefficient'].append(nx.average_clustering(G2)) # print('transitivity') data['transitivity'].append(nx.transitivity(G)) # print('diameter') # data['network_diameter'].append(nx.diameter(largest_component)) # print('reciprocity') data['reciprocity'].append(nx.reciprocity(G)) # print('effective size') if not long: if user_id in list(G.nodes): ef = nx.effective_size(G, nodes=[user_id]) data['ego_effective_size'].append(ef[user_id]) else: data['ego_effective_size'].append(0) # print('degree') data['graph_degree_centrality'].append(graph_centrality(G, kind='degree')) # print('betweenness') if not long: data['graph_betweenness_centrality'].append( graph_centrality(largest_component, kind='betweenness')) # print('eigen_centrality') try: eig = list(nx.eigenvector_centrality_numpy(G).values()) data['mean_eigen_centrality'].append(np.mean(eig)) except: data['mean_eigen_centrality'].append(0) # print('simmelian') # if long: data['simmelian_ties'].append(get_simmelian_ties(G, sparse=True)) # print('census') census = nx.triadic_census(G) data['triad_003'].append(census['003']) data['triad_012'].append(census['012']) data['triad_102'].append(census['021C']) data['triad_021D'].append(census['021D']) data['triad_021U'].append(census['021U']) data['triad_021C'].append(census['030C']) data['triad_111D'].append(census['030T']) data['triad_111U'].append(census['102']) data['triad_030T'].append(census['111D']) data['triad_030C'].append(census['111U']) data['triad_201'].append(census['120C']) data['triad_120D'].append(census['120D']) data['triad_120U'].append(census['120U']) data['triad_120C'].append(census['201']) data['triad_210'].append(census['210']) data['triad_300'].append(census['300']) partition = community.best_partition(G2) p_df = pd.DataFrame.from_dict(partition, orient='index') # print('louvaine') data['num_louvaine_groups'].append(len(set(partition.values()))) data['size_largest_louvaine_group'].append(p_df[0].value_counts().max()) df = pd.DataFrame(data) return (df)
def test_effective_size_undirected_borgatti(self): effective_size = nx.effective_size(self.G) assert_almost_equal(round(effective_size['G'], 2), 4.67) assert_almost_equal(round(effective_size['A'], 2), 2.50) assert_almost_equal(round(effective_size['C'], 2), 1)
def test_effective_size_undirected_borgatti(self): effective_size = nx.effective_size(self.G) assert effective_size["G"] == pytest.approx(4.67, abs=1e-2) assert effective_size["A"] == pytest.approx(2.50, abs=1e-2) assert effective_size["C"] == pytest.approx(1, abs=1e-2)
def test_effective_size_directed(self): effective_size = nx.effective_size(self.D) assert_almost_equal(round(effective_size[0], 3), 1.167) assert_almost_equal(round(effective_size[1], 3), 1.167) assert_almost_equal(round(effective_size[2], 3), 1)
def test_effective_size_borgatti_isolated(self): G = self.G.copy() G.add_node(1) effective_size = nx.effective_size(G) assert_true(math.isnan(effective_size[1]))
def test_effective_size_isolated(self): G = self.G.copy() G.add_node(1) nx.set_edge_attributes(G, self.G_weights, 'weight') effective_size = nx.effective_size(G, weight='weight') assert_true(math.isnan(effective_size[1]))
def test_effective_size_directed(self): effective_size = nx.effective_size(self.D) assert almost_equal(effective_size[0], 1.167, places=3) assert almost_equal(effective_size[1], 1.167, places=3) assert almost_equal(effective_size[2], 1, places=3)
def test_effective_size_isolated(self): G = self.G.copy() G.add_node(1) nx.set_edge_attributes(G, 'weight', self.G_weights) effective_size = nx.effective_size(G, weight='weight') assert_true(math.isnan(effective_size[1]))
def test_effective_size_directed(self): effective_size = nx.effective_size(self.D) assert effective_size[0] == pytest.approx(1.167, abs=1e-3) assert effective_size[1] == pytest.approx(1.167, abs=1e-3) assert effective_size[2] == pytest.approx(1, abs=1e-3)
def features_part2(info): """ third set of features. """ G = info['G'] n = info['num_nodes'] num_units = info['num_units'] edges = info['edges'] nedges = len(edges) H = G.to_undirected() res = dict() cc = nx.closeness_centrality(G) res['closeness_centrality'] = cc[n - 1] res['closeness_centrality_mean'] = np.mean(list(cc.values())) bc = nx.betweenness_centrality(G) res['betweenness_centrality_mean'] = np.mean(list(bc.values())) cfcc = nx.current_flow_closeness_centrality(H) res['current_flow_closeness_centrality_mean'] = np.mean(list( cfcc.values())) cfbc = nx.current_flow_betweenness_centrality(H) res['current_flow_betweenness_centrality_mean'] = np.mean( list(cfbc.values())) soc = nx.second_order_centrality(H) res['second_order_centrality_mean'] = np.mean(list(soc.values())) / n cbc = nx.communicability_betweenness_centrality(H) res['communicability_betweenness_centrality_mean'] = np.mean( list(cbc.values())) comm = nx.communicability(H) res['communicability'] = np.log(comm[0][n - 1]) res['communicability_start_mean'] = np.log(np.mean(list(comm[0].values()))) res['communicability_end_mean'] = np.log( np.mean(list(comm[n - 1].values()))) res['radius'] = nx.radius(H) res['diameter'] = nx.diameter(H) res['local_efficiency'] = nx.local_efficiency(H) res['global_efficiency'] = nx.global_efficiency(H) res['efficiency'] = nx.efficiency(H, 0, n - 1) pgr = nx.pagerank_numpy(G) res['page_rank'] = pgr[n - 1] res['page_rank_mean'] = np.mean(list(pgr.values())) cnstr = nx.constraint(G) res['constraint_mean'] = np.mean(list(cnstr.values())[:-1]) effsize = nx.effective_size(G) res['effective_size_mean'] = np.mean(list(effsize.values())[:-1]) cv = np.array(list(nx.closeness_vitality(H).values())) cv[cv < 0] = 0 res['closeness_vitality_mean'] = np.mean(cv) / n res['wiener_index'] = nx.wiener_index(H) / (n * (n - 1) / 2) A = nx.to_numpy_array(G) expA = expm(A) res['expA'] = np.log(expA[0, n - 1]) res['expA_mean'] = np.log(np.mean(expA[np.triu_indices(n)])) return res
def test_effective_size_undirected_borgatti(self): effective_size = nx.effective_size(self.G) assert almost_equal(effective_size["G"], 4.67, places=2) assert almost_equal(effective_size["A"], 2.50, places=2) assert almost_equal(effective_size["C"], 1, places=2)