def test_clustering_score(self): """ Test global clustering score with generalized formula This is the average of the local clustering scores for each node v: 2 Nv where Kv = degree C(v) = ---------- Nv = number of edges between Kv (Kv - 1) the neighbors of v """ test_data_path = os.path.join(self._fixtures_dir, 'les-miserables.csv') results = ctd.get_summary(test_data_path) graph = ctd.get_graph(test_data_path) local_scores = [] for v in graph.nodes(): k = graph.degree(v) neighbor_links = [] for u in nx.all_neighbors(graph, v): neighbor_links += [ tuple(sorted((u, w))) for w in nx.common_neighbors(graph, u, v) ] n = len(list(set(neighbor_links))) local_scores.append( 2 * n / float(k * (k - 1))) if k > 1 else local_scores.append(0) self.assertAlmostEqual(results['clustering'], sum(local_scores) / float(len(local_scores)))
def test_centrality_scores(self): """ Test betweenness centrality with generalized formula For a node v and every other node pair (s, t), we take the proportion of shortest paths s => t that include v and then normalize the sum of all the proportions by dividing (N - 1)(N - 2) / 2, the number of node pairs """ test_data_path = os.path.join(self._fixtures_dir, 'les-miserables.csv') results = ctd.get_summary(test_data_path) graph = ctd.get_graph(test_data_path) table = results['table'] self.assertEqual(table[0]['id'], 'Valjean') nodes = graph.nodes() nodes.remove('Valjean') betweenness_centrality = 0 visited_paths = [] for u in nodes: for v in nodes: current_path = tuple(sorted((u, v))) if u == v or current_path in visited_paths: continue else: visited_paths.append(current_path) paths = list(nx.all_shortest_paths(graph, u, v)) total_paths = len(paths) paths_with_valjean = reduce( lambda n, path: n + 1 if 'Valjean' in path else n, paths, 0) betweenness_centrality += paths_with_valjean / float( total_paths) node_pairs = len(nodes) * (len(nodes) - 1) / float(2) normalized_score = betweenness_centrality / node_pairs self.assertAlmostEqual(table[0]['centrality'], normalized_score)
def test_centrality_scores(self): """ Test betweenness centrality with generalized formula For a node v and every other node pair (s, t), we take the proportion of shortest paths s => t that include v and then normalize the sum of all the proportions by dividing (N - 1)(N - 2) / 2, the number of node pairs """ test_data_path = os.path.join(self._fixtures_dir, "les-miserables.csv") results = ctd.get_summary(test_data_path) graph = ctd.get_graph(test_data_path) table = results["table"] self.assertEqual(table[0]["id"], u"Valjean") nodes = graph.nodes() nodes.remove(u"Valjean") betweenness_centrality = 0 visited_paths = [] for u in nodes: for v in nodes: current_path = tuple(sorted((u, v))) if u == v or current_path in visited_paths: continue else: visited_paths.append(current_path) paths = list(nx.all_shortest_paths(graph, u, v)) total_paths = len(paths) paths_with_valjean = reduce(lambda n, path: n + 1 if u"Valjean" in path else n, paths, 0) betweenness_centrality += paths_with_valjean / float(total_paths) node_pairs = len(nodes) * (len(nodes) - 1) / float(2) normalized_score = betweenness_centrality / node_pairs self.assertAlmostEqual(table[0]["centrality"], normalized_score)
def test_clustering_score(self): """ Test global clustering score with generalized formula This is the average of the local clustering scores for each node v: 2 Nv where Kv = degree C(v) = ---------- Nv = number of edges between Kv (Kv - 1) the neighbors of v """ test_data_path = os.path.join(self._fixtures_dir, "les-miserables.csv") results = ctd.get_summary(test_data_path) graph = ctd.get_graph(test_data_path) local_scores = [] for v in graph.nodes(): k = graph.degree(v) neighbor_links = [] for u in nx.all_neighbors(graph, v): neighbor_links += [tuple(sorted((u, w))) for w in nx.common_neighbors(graph, u, v)] n = len(list(set(neighbor_links))) local_scores.append(2 * n / float(k * (k - 1))) if k > 1 else local_scores.append(0) self.assertAlmostEqual(results["clustering"], sum(local_scores) / float(len(local_scores)))