def test_article(self): """our algorithm matches article's""" G = small_ego_G() disp_uh = nx.dispersion(G, 'u', 'h', normalized=False) disp_ub = nx.dispersion(G, 'u', 'b', normalized=False) assert disp_uh == 4 assert disp_ub == 1
def test_results_length(self): """there is a result for every node""" G = small_ego_G() disp = nx.dispersion(G) disp_Gu = nx.dispersion(G, 'u') disp_uv = nx.dispersion(G, 'u', 'h') assert len(disp) == len(G) assert len(disp_Gu) == len(G) - 1 assert type(disp_uv) is float
def test_impossible_things(self): G=nx.karate_club_graph() disp = nx.dispersion(G) for u in disp: for v in disp[u]: assert disp[u][v] >= 0
# get graph measures -------------------------- # order (# vertices) and size (# edges) order = graph.order() size = graph.size() # degree distribution degrees = nx.degree(graph) # density density = nx.density(graph) # diameter diameter = nx.diameter(graph) # dispersion dispersion = nx.dispersion(graph) # betweenness betweenness = nx.betweenness_centrality(graph) # flow flow = nx.centrality.current_flow_betweenness_centrality(graph) # eigen vector eigen = nx.eigenvector_centrality(graph) # push to data frame and CSV for pretty printing in report data = pd.DataFrame(degrees.values(), columns = ['degree']) data['betweenness'] = betweenness.values() data['flow'] = flow.values() data['eigen'] = eigen.values()
# The pagerank() of networkx returns a dictionary of the nodes and their pagerank values. In the next cell, I used the heapq library of python to get the top 2 nodes with the highest pagerank values. # In[13]: k2 = heapq.nlargest(2, pagerank, key=pagerank.get) print("The nodes having highest PageRank are:") for i in k2: print("Node:", i) # #### Dispersion between the two nodes with highest PageRank # The dispersion of the two noded can be found by passing them as arguments to the dispersion() function of networkx library. # In[14]: dispersion2 = nx.dispersion(G, u="4037", v="15") print("The dispersion of the two nodes with the highest pageranks is:", dispersion2) # ### 8. Top Five nodes with the highest authority score according to HITS # In[15]: hubs, authority_score = nx.hits(G, normalized=False) type(authority_score) # The hits() function of networkx library returns a tuple of hub values and authority scores. The authority scores are in dictionary format, in the next step, by using heapq library, the top 5 nodes with highest authority scores can be found. # In[16]: k3 = heapq.nlargest(5, authority_score, key=authority_score.get)
def test_impossible_things(self): G = nx.karate_club_graph() disp = nx.dispersion(G) for u in disp: for v in disp[u]: assert disp[u][v] >= 0
def dispersion(self): """ Calculate the dispersion for every node in the graph """ return nx.dispersion(self._graph)
def coefficients_digraph(digraph, df_train, df_test): def intersection(lst1, lst2): return set(set(lst1) & set(lst2)) filename_testing = os.path.join(Setup.path_project(__file__), "data", "testing.txt") filename_training = os.path.join(Setup.path_project(__file__), "data", "training.txt") for filename, df in zip([filename_training, filename_testing], [df_train, df_test]): disp = [] # dispersion lh_A = [] # likelihood given A lh_D = [] # likelihood given D ded = [] # deductive metric ind = [] # inductive metric inf = [] # inference score inf_2d = [] # modified inference ded_log = [] ind_log = [] inf_log = [] inf_log_2d = [] # ded_2 = [] # ind_2 = [] # inf_ql = [] abd = [] with open(filename, "r") as f: for line in f: line = line.split() # D is the descendant and A is the ancestor of a node. # In this case, A1 is the ancestor of the line[0] Dx = set(digraph.predecessors(line[0])) Ax = set(digraph.successors(line[0])) Dy = set(digraph.predecessors(line[1])) Ay = set(digraph.successors(line[1])) # Dx_2 = None # Ax_2 = None # Dy_2 = None # Ay_2 = None # alpha = None AYx = intersection(Ax, Dy) DYx = intersection(Dx, Dy) disp.append(nx.dispersion(digraph, line[0], line[1])) if len(Ax) == 0: lh_A.append(0) ded.append(0) ded_log.append(0) abd.append(0) else: lh_A.append(len(AYx) / len(Ax)) ded.append(len(intersection(Ax, Dy)) / len(Ax)) ded_log.append( len(intersection(Ax, Dy)) / len(Ax) * np.log(len(Ax))) abd.append(len(intersection(Ax, Ay)) / len(Ax)) if len(Dx) == 0: lh_D.append(0) ind.append(0) ind_log.append(0) else: lh_D.append(len(DYx) / len(Dx)) ind.append(len(intersection(Dx, Dy)) / len(Dx)) ind_log.append( len(intersection(Dx, Dy)) / len(Dx) * np.log(len(Dx))) # if len(Ax_2) == 0: # ded_2.append(ded[-1]) # else: # ded_2.append( len(intersection(Ax_2, Dy_2))/(len(Ax_2) * alpha) + ded[-1]) # # if len(Dx_2) == 0: # ind_2.append(ind[-1]) # else: # ind_2.append( len(intersection(Dx_2, Dy_2))/(len(Dx_2) * alpha) + ind[-1]) inf.append(ded[-1] + ind[-1]) inf_2d.append(2 * ded[-1] + ind[-1]) inf_log.append(ded_log[-1] + ind_log[-1]) inf_log_2d.append(2 * ded_log[-1] + ind_log[-1]) # inf_ql.append(ded_2[-1] + ind_2[-1]) df["Dispersion"] = disp df["Likelihood A"] = lh_A df["Likelihood D"] = lh_D df["Deductive"] = ded df["Inductive"] = ind df["Inference"] = inf df["Inference 2D"] = inf_2d df["Deductive log"] = ded_log df["Inductive log"] = ind_log df["Inference log"] = inf_log df["Inference log 2D"] = inf_log_2d # df["Deductive square"] = ded_2 # df["Inductive square"] = ind_2 # df["Inference QL"] = inf_ql df["Abductive"] = abd return df_train, df_test
def dispersion(self): return nx.dispersion(self._graph)
def main(): # inputFile - network input file_path = sys.argv[1] input_file = open(file_path, "rb") type = 0 if file_path.endswith(".txt"): type = 1 # read as edge list Graph = nx.read_edgelist(input_file) elif file_path.endswith(".gml"): type = 2 # read as gml Graph = nx.read_gml(input_file, relabel=False) input_file.close() # calculate network centrality for this graph print("Computing betweenness centrality") betweenCentrality = nx.betweenness_centrality(Graph) write_to_file("betweenness_" + str(type), betweenCentrality) print("Computing eigen vector centrality") eigenVectorCentrality = nx.eigenvector_centrality(Graph, max_iter=100) write_to_file("eigenvector_" + str(type), eigenVectorCentrality) print("Computing page rank centrality") pageRankCentrality = nx.pagerank(Graph, alpha=0.85, max_iter=100) write_to_file("pagerank_" + str(type), pageRankCentrality) print("Computing degree centrality") degreeCentrality = nx.degree_centrality(Graph) write_to_file("degree_" + str(type), degreeCentrality) print("Computing clustering coefficient") clusteringCoefficient = nx.clustering(Graph) write_to_file("clustering_" + str(type), clusteringCoefficient) # calculating dispersion scores if type == 2: print("Computing dispersion scores for Kringel") dispersionScore = nx.dispersion(Graph, 20) write_to_file("dispersion_Kringel", dispersionScore) print("Computing dispersion scores for Trigger") dispersionScore = nx.dispersion(Graph, 51) write_to_file("dispersion_Trigger", dispersionScore) print("Computing dispersion scores for SN4") dispersionScore = nx.dispersion(Graph, 37) write_to_file("dispersion_SN4", dispersionScore) else: print("Computing dispersion scores for 107") dispersionScore = nx.dispersion(Graph, "107") write_to_file("dispersion_107", dispersionScore) print("Computing dispersion scores for 414") dispersionScore = nx.dispersion(Graph, "414") write_to_file("dispersion_414", dispersionScore) print("Computing dispersion scores for 698") dispersionScore = nx.dispersion(Graph, "698") write_to_file("dispersion_698", dispersionScore) # comparison print("Comparing Degree Centrality and Page Rank Centrality") compare_spearmanr(degreeCentrality, pageRankCentrality) print("Comparing Degree Centrality and Betweenness Centrality") compare_spearmanr(degreeCentrality, betweenCentrality) print("Comparing Degree Centrality and Eigen vector Centrality") compare_spearmanr(degreeCentrality, eigenVectorCentrality) print("Comparing clustering Centrality and Page Rank Centrality") compare_spearmanr(clusteringCoefficient, pageRankCentrality) print("Comparing clustering Centrality and Betweenness Centrality") compare_spearmanr(clusteringCoefficient, betweenCentrality) print("Comparing clustering Centrality and Eigen vector Centrality") compare_spearmanr(clusteringCoefficient, eigenVectorCentrality) return
def nodes_dispersion(self, source, target): return nx.dispersion(self.g, u=source, v=target)
#parkinson_ancestors = testmapper.get_ancestor_stats(49049000) #%% Investigate parents mapper.G.nodes[191690004] p = list(mapper.G.predecessors(191690004)) mapper.snomed_labels.loc[p] get_node = lambda s: mapper.G.nodes[s] get_node(254206003) get_parents = lambda s: mapper.snomed_labels.loc[mapper.G.predecessors(s)] get_parents(254206003) #%% Find how many SNOMED codes each ICD code has import matplotlib.pyplot as plt icd_map_count = mapper.icd_edges.groupby('mapTarget').count().sort_values( 'referencedComponentId') plt.hist(icd_map_count.referencedComponentId, log=True, bins=50) #%% Test where weight gets lost weights_f84.groupby('depth').sum() weights_f84.groupby('height').sum() #%% Test dispersion nx.dispersion(testmapper.G, 110359009, 'G20') nx.dispersion(testmapper.G, 'G20', 110359009)
def get_dispersion(Edges, G): print 'Computing edge nodes dispersion' return [nx.dispersion(G, u=e[0], v=e[1]) for e in Edges[:]]