Ejemplo n.º 1
0
 def test_article(self):
     """our algorithm matches article's"""
     G = small_ego_G()
     disp_uh = nx.dispersion(G, 'u', 'h', normalized=False)
     disp_ub = nx.dispersion(G, 'u', 'b', normalized=False)
     assert disp_uh == 4
     assert disp_ub == 1
Ejemplo n.º 2
0
 def test_results_length(self):
     """there is a result for every node"""
     G = small_ego_G()
     disp = nx.dispersion(G)
     disp_Gu = nx.dispersion(G, 'u') 
     disp_uv = nx.dispersion(G, 'u', 'h') 
     assert len(disp) == len(G)
     assert len(disp_Gu) == len(G) - 1
     assert type(disp_uv) is float
Ejemplo n.º 3
0
 def test_impossible_things(self):
     G=nx.karate_club_graph()
     disp = nx.dispersion(G)
     for u in disp:
         for v in disp[u]:
             assert disp[u][v] >= 0
Ejemplo n.º 4
0
# get graph measures --------------------------
# order (# vertices) and size (# edges)
order = graph.order()
size = graph.size()
                           
# degree distribution
degrees = nx.degree(graph)

# density
density = nx.density(graph)

# diameter
diameter = nx.diameter(graph)

# dispersion
dispersion = nx.dispersion(graph)

# betweenness
betweenness = nx.betweenness_centrality(graph)

# flow
flow = nx.centrality.current_flow_betweenness_centrality(graph)

# eigen vector
eigen = nx.eigenvector_centrality(graph)

# push to data frame and CSV for pretty printing in report
data = pd.DataFrame(degrees.values(), columns = ['degree'])
data['betweenness'] = betweenness.values()
data['flow'] = flow.values()
data['eigen'] = eigen.values()
Ejemplo n.º 5
0
# The pagerank() of networkx returns a dictionary of the nodes and their pagerank values. In the next cell, I used the heapq library of python to get the top 2 nodes with the highest pagerank values.

# In[13]:

k2 = heapq.nlargest(2, pagerank, key=pagerank.get)
print("The nodes having highest PageRank are:")
for i in k2:
    print("Node:", i)

# #### Dispersion between the two nodes with highest PageRank

# The dispersion of the two noded can be found by passing them as arguments to the dispersion() function of networkx library.

# In[14]:

dispersion2 = nx.dispersion(G, u="4037", v="15")
print("The dispersion of the two nodes with the highest pageranks is:",
      dispersion2)

# ### 8. Top Five nodes with the highest authority score according to HITS

# In[15]:

hubs, authority_score = nx.hits(G, normalized=False)
type(authority_score)

# The hits() function of networkx library returns a tuple of hub values and authority scores. The authority scores are in dictionary format, in the next step, by using heapq library, the top 5 nodes with highest authority scores can be found.

# In[16]:

k3 = heapq.nlargest(5, authority_score, key=authority_score.get)
Ejemplo n.º 6
0
 def test_impossible_things(self):
     G = nx.karate_club_graph()
     disp = nx.dispersion(G)
     for u in disp:
         for v in disp[u]:
             assert disp[u][v] >= 0
Ejemplo n.º 7
0
 def dispersion(self):
     """
     Calculate the dispersion for every node in the graph
     """
     return nx.dispersion(self._graph)
Ejemplo n.º 8
0
    def coefficients_digraph(digraph, df_train, df_test):
        def intersection(lst1, lst2):
            return set(set(lst1) & set(lst2))

        filename_testing = os.path.join(Setup.path_project(__file__), "data",
                                        "testing.txt")
        filename_training = os.path.join(Setup.path_project(__file__), "data",
                                         "training.txt")

        for filename, df in zip([filename_training, filename_testing],
                                [df_train, df_test]):
            disp = []  # dispersion
            lh_A = []  # likelihood given A
            lh_D = []  # likelihood given D

            ded = []  # deductive metric
            ind = []  # inductive metric
            inf = []  # inference score
            inf_2d = []  # modified inference

            ded_log = []
            ind_log = []
            inf_log = []
            inf_log_2d = []

            # ded_2 = []
            # ind_2 = []
            # inf_ql = []

            abd = []

            with open(filename, "r") as f:
                for line in f:
                    line = line.split()

                    # D is the descendant and A is the ancestor of a node.
                    # In this case, A1 is the ancestor of the line[0]
                    Dx = set(digraph.predecessors(line[0]))
                    Ax = set(digraph.successors(line[0]))
                    Dy = set(digraph.predecessors(line[1]))
                    Ay = set(digraph.successors(line[1]))

                    # Dx_2 = None
                    # Ax_2 = None
                    # Dy_2 = None
                    # Ay_2 = None
                    # alpha = None

                    AYx = intersection(Ax, Dy)
                    DYx = intersection(Dx, Dy)

                    disp.append(nx.dispersion(digraph, line[0], line[1]))

                    if len(Ax) == 0:
                        lh_A.append(0)
                        ded.append(0)
                        ded_log.append(0)
                        abd.append(0)
                    else:
                        lh_A.append(len(AYx) / len(Ax))
                        ded.append(len(intersection(Ax, Dy)) / len(Ax))
                        ded_log.append(
                            len(intersection(Ax, Dy)) / len(Ax) *
                            np.log(len(Ax)))
                        abd.append(len(intersection(Ax, Ay)) / len(Ax))

                    if len(Dx) == 0:
                        lh_D.append(0)
                        ind.append(0)
                        ind_log.append(0)
                    else:
                        lh_D.append(len(DYx) / len(Dx))
                        ind.append(len(intersection(Dx, Dy)) / len(Dx))
                        ind_log.append(
                            len(intersection(Dx, Dy)) / len(Dx) *
                            np.log(len(Dx)))

                    # if len(Ax_2) == 0:
                    #     ded_2.append(ded[-1])
                    # else:
                    #     ded_2.append( len(intersection(Ax_2, Dy_2))/(len(Ax_2) * alpha) + ded[-1])
                    #
                    # if len(Dx_2) == 0:
                    #     ind_2.append(ind[-1])
                    # else:
                    #     ind_2.append( len(intersection(Dx_2, Dy_2))/(len(Dx_2) * alpha) + ind[-1])

                    inf.append(ded[-1] + ind[-1])
                    inf_2d.append(2 * ded[-1] + ind[-1])
                    inf_log.append(ded_log[-1] + ind_log[-1])
                    inf_log_2d.append(2 * ded_log[-1] + ind_log[-1])
                    # inf_ql.append(ded_2[-1] + ind_2[-1])

            df["Dispersion"] = disp

            df["Likelihood A"] = lh_A
            df["Likelihood D"] = lh_D

            df["Deductive"] = ded
            df["Inductive"] = ind
            df["Inference"] = inf
            df["Inference 2D"] = inf_2d

            df["Deductive log"] = ded_log
            df["Inductive log"] = ind_log
            df["Inference log"] = inf_log
            df["Inference log 2D"] = inf_log_2d

            # df["Deductive square"] = ded_2
            # df["Inductive square"] = ind_2
            # df["Inference QL"] = inf_ql

            df["Abductive"] = abd

        return df_train, df_test
Ejemplo n.º 9
0
 def dispersion(self):
     return nx.dispersion(self._graph)
Ejemplo n.º 10
0
def main():
    # inputFile - network input
    file_path = sys.argv[1]
    input_file = open(file_path, "rb")

    type = 0
    if file_path.endswith(".txt"):
        type = 1
        # read as edge list
        Graph = nx.read_edgelist(input_file)
    elif file_path.endswith(".gml"):
        type = 2
        # read as gml
        Graph = nx.read_gml(input_file, relabel=False)

    input_file.close()

    # calculate network centrality for this graph
    print("Computing betweenness centrality")
    betweenCentrality = nx.betweenness_centrality(Graph)
    write_to_file("betweenness_" + str(type), betweenCentrality)

    print("Computing eigen vector centrality")
    eigenVectorCentrality = nx.eigenvector_centrality(Graph, max_iter=100)
    write_to_file("eigenvector_" + str(type), eigenVectorCentrality)

    print("Computing page rank centrality")
    pageRankCentrality = nx.pagerank(Graph, alpha=0.85, max_iter=100)
    write_to_file("pagerank_" + str(type), pageRankCentrality)

    print("Computing degree centrality")
    degreeCentrality = nx.degree_centrality(Graph)
    write_to_file("degree_" + str(type), degreeCentrality)

    print("Computing clustering coefficient")
    clusteringCoefficient = nx.clustering(Graph)
    write_to_file("clustering_" + str(type), clusteringCoefficient)

    # calculating dispersion scores
    if type == 2:
        print("Computing dispersion scores for Kringel")
        dispersionScore = nx.dispersion(Graph, 20)
        write_to_file("dispersion_Kringel", dispersionScore)

        print("Computing dispersion scores for Trigger")
        dispersionScore = nx.dispersion(Graph, 51)
        write_to_file("dispersion_Trigger", dispersionScore)

        print("Computing dispersion scores for SN4")
        dispersionScore = nx.dispersion(Graph, 37)
        write_to_file("dispersion_SN4", dispersionScore)
    else:
        print("Computing dispersion scores for 107")
        dispersionScore = nx.dispersion(Graph, "107")
        write_to_file("dispersion_107", dispersionScore)

        print("Computing dispersion scores for 414")
        dispersionScore = nx.dispersion(Graph, "414")
        write_to_file("dispersion_414", dispersionScore)

        print("Computing dispersion scores for 698")
        dispersionScore = nx.dispersion(Graph, "698")
        write_to_file("dispersion_698", dispersionScore)

    # comparison
    print("Comparing Degree Centrality and Page Rank Centrality")
    compare_spearmanr(degreeCentrality, pageRankCentrality)
    print("Comparing Degree Centrality and Betweenness Centrality")
    compare_spearmanr(degreeCentrality, betweenCentrality)
    print("Comparing Degree Centrality and Eigen vector Centrality")
    compare_spearmanr(degreeCentrality, eigenVectorCentrality)
    print("Comparing clustering Centrality and Page Rank Centrality")
    compare_spearmanr(clusteringCoefficient, pageRankCentrality)
    print("Comparing clustering Centrality and Betweenness Centrality")
    compare_spearmanr(clusteringCoefficient, betweenCentrality)
    print("Comparing clustering Centrality and Eigen vector Centrality")
    compare_spearmanr(clusteringCoefficient, eigenVectorCentrality)

    return
Ejemplo n.º 11
0
 def nodes_dispersion(self, source, target):
     return nx.dispersion(self.g, u=source, v=target)
Ejemplo n.º 12
0
#parkinson_ancestors = testmapper.get_ancestor_stats(49049000)

#%% Investigate parents
mapper.G.nodes[191690004]
p = list(mapper.G.predecessors(191690004))
mapper.snomed_labels.loc[p]

get_node = lambda s: mapper.G.nodes[s]
get_node(254206003)

get_parents = lambda s: mapper.snomed_labels.loc[mapper.G.predecessors(s)]
get_parents(254206003)

#%% Find how many SNOMED codes each ICD code has
import matplotlib.pyplot as plt

icd_map_count = mapper.icd_edges.groupby('mapTarget').count().sort_values(
    'referencedComponentId')
plt.hist(icd_map_count.referencedComponentId, log=True, bins=50)

#%% Test where weight gets lost

weights_f84.groupby('depth').sum()
weights_f84.groupby('height').sum()

#%%   Test dispersion

nx.dispersion(testmapper.G, 110359009, 'G20')
nx.dispersion(testmapper.G, 'G20', 110359009)
Ejemplo n.º 13
0
def get_dispersion(Edges, G):
    print 'Computing edge nodes dispersion'
    return [nx.dispersion(G, u=e[0], v=e[1]) for e in Edges[:]]