Beispiel #1
0
 def test_hits_numpy(self):
     G = self.G
     h, a = nx.hits_numpy(G)
     for n in G:
         assert almost_equal(h[n], G.h[n], places=4)
     for n in G:
         assert almost_equal(a[n], G.a[n], places=4)
Beispiel #2
0
 def test_hits_numpy(self):
     G = self.G
     h, a = nx.hits_numpy(G)
     for n in G:
         assert h[n] == pytest.approx(G.h[n], abs=1e-4)
     for n in G:
         assert a[n] == pytest.approx(G.a[n], abs=1e-4)
Beispiel #3
0
 def test_empty(self):
     numpy = pytest.importorskip('numpy')
     G = networkx.Graph()
     assert networkx.hits(G) == ({}, {})
     assert networkx.hits_numpy(G) == ({}, {})
     assert networkx.authority_matrix(G).shape == (0, 0)
     assert networkx.hub_matrix(G).shape == (0, 0)
Beispiel #4
0
 def test_empty(self):
     G=networkx.Graph()
     assert_equal(networkx.hits(G),({},{}))
     assert_equal(networkx.hits_numpy(G),({},{}))
     assert_equal(networkx.hits_scipy(G),({},{}))
     assert_equal(networkx.authority_matrix(G).shape,(0,0))
     assert_equal(networkx.hub_matrix(G).shape,(0,0))
def calculate_network_measures(G):
    in_degree = nx.in_degree_centrality(G)
    out_degree = nx.out_degree_centrality(G)
    betweenness = nx.betweenness_centrality(G, weight=WEIGHT)
    closeness = nx.closeness_centrality(G, distance=WEIGHT)
    eigenvector = nx.eigenvector_centrality(G.reverse(), weight=WEIGHT)
    clustering = nx.clustering(G.to_undirected(), weight=WEIGHT)
    pagerank = nx.pagerank(G, weight=WEIGHT)
    hubs, authorities = nx.hits_numpy(G)
    max_clique = node_clique_number(G.to_undirected())

    node_cliques = cliques_containing_node(G.to_undirected())
    node_cliques_count = {}
    for node, cliques in node_cliques.items():
        node_cliques_count[node] = len(cliques)

    network_df = pd.DataFrame(list(G.nodes), columns=[ID]);

    network_df[IN_DEGREE] = network_df[ID].map(in_degree)
    network_df[OUT_DEGREE] = network_df[ID].map(out_degree)
    network_df[BETWEENNESS] = network_df[ID].map(betweenness)
    network_df[CLOSENESS] = network_df[ID].map(closeness)
    network_df[EIGENVECTOR] = network_df[ID].map(eigenvector)
    network_df[CLUSTERING] = network_df[ID].map(clustering)
    network_df[PAGERANK] = network_df[ID].map(pagerank)
    network_df[HUBS] = network_df[ID].map(hubs)
    network_df[AUTHORITIES] = network_df[ID].map(authorities)
    network_df[MAX_CLIQUE] = network_df[ID].map(max_clique)
    network_df[CLIQUES_COUNT] = network_df[ID].map(node_cliques_count)

    return network_df
Beispiel #6
0
 def test_empty(self):
     G = nx.Graph()
     assert nx.hits(G) == ({}, {})
     assert nx.hits_numpy(G) == ({}, {})
     assert _hits_python(G) == ({}, {})
     assert nx.hits_scipy(G) == ({}, {})
     assert nx.authority_matrix(G).shape == (0, 0)
     assert nx.hub_matrix(G).shape == (0, 0)
Beispiel #7
0
 def test_hits_numpy(self):
     numpy = pytest.importorskip('numpy')
     G = self.G
     h, a = networkx.hits_numpy(G)
     for n in G:
         assert almost_equal(h[n], G.h[n], places=4)
     for n in G:
         assert almost_equal(a[n], G.a[n], places=4)
Beispiel #8
0
def hits(g):
    (hubs, auths) = nx.hits_numpy(g)
    ranks = dict()
    for x in g.nodes():
        if isinstance(x, int):
            ranks[x] = hubs[x]
        else:
            ranks[x] = auths[x]
    return ranks
Beispiel #9
0
 def test_numpy_hits(self):
     G=self.G
     try:
         h,a=networkx.hits_numpy(G,tol=1.e-08)
         for (x,y) in zip(sorted(h),self.G.h):
             assert_almost_equal(x,y,places=5)
         for (x,y) in zip(sorted(a),self.G.a):
             assert_almost_equal(x,y,places=5)
     except ImportError:
         print "Skipping hits_numpy test"
Beispiel #10
0
 def test_empty(self):
     try:
         import numpy
     except ImportError:
         raise SkipTest('numpy not available.')
     G=networkx.Graph()
     assert_equal(networkx.hits(G),({},{}))
     assert_equal(networkx.hits_numpy(G),({},{}))
     assert_equal(networkx.authority_matrix(G).shape,(0,0))
     assert_equal(networkx.hub_matrix(G).shape,(0,0))
Beispiel #11
0
 def test_empty(self):
     try:
         import numpy
     except ImportError:
         raise SkipTest('numpy not available.')
     G = networkx.Graph()
     assert_equal(networkx.hits(G), ({}, {}))
     assert_equal(networkx.hits_numpy(G), ({}, {}))
     assert_equal(networkx.authority_matrix(G).shape, (0, 0))
     assert_equal(networkx.hub_matrix(G).shape, (0, 0))
Beispiel #12
0
def hits(g):
    k, v = nx.hits_numpy(g, True)
    minimo = min(k.items(), key=lambda x: x[1])
    massimo = max(k.items(), key=lambda x: x[1])
    media = sum(k.values()) / len(k)
    print(minimo, massimo, media)
    minimo_a = min(v.items(), key=lambda x: x[1])
    massimo_a = max(v.items(), key=lambda x: x[1])
    media_a = sum(v.values()) / len(v)
    print(minimo_a, massimo_a, media_a)
Beispiel #13
0
 def test_numpy_hits(self):
     G = self.G
     try:
         h, a = networkx.hits_numpy(G, tol=1.e-08)
         for (x, y) in zip(sorted(h), self.G.h):
             assert_almost_equal(x, y, places=5)
         for (x, y) in zip(sorted(a), self.G.a):
             assert_almost_equal(x, y, places=5)
     except ImportError:
         print "Skipping hits_numpy test"
Beispiel #14
0
    def test_numpy_hits(self):
        G = self.G
        try:
            import numpy
        except ImportError:
            raise SkipTest('numpy not available.')

        h, a = networkx.hits_numpy(G, tol=1.e-08)
        for (x, y) in zip(sorted(h), self.G.h):
            assert_almost_equal(x, y, places=5)
        for (x, y) in zip(sorted(a), self.G.a):
            assert_almost_equal(x, y, places=5)
Beispiel #15
0
    def test_hits_numpy(self):
        try:
            import numpy as np
        except ImportError:
            raise SkipTest('NumPy not available.')

        G = self.G
        h, a = networkx.hits_numpy(G)
        for n in G:
            assert_almost_equal(h[n], G.h[n], places=4)
        for n in G:
            assert_almost_equal(a[n], G.a[n], places=4)
Beispiel #16
0
 def test_numpy_hits(self):
     G=self.G
     try:
         import numpy
     except ImportError:
         raise SkipTest('numpy not available.')
     
     h,a=networkx.hits_numpy(G,tol=1.e-08)
     for (x,y) in zip(sorted(h),self.G.h):
         assert_almost_equal(x,y,places=5)
     for (x,y) in zip(sorted(a),self.G.a):
         assert_almost_equal(x,y,places=5)
Beispiel #17
0
    def test_hits_numpy(self):
        try:
            import numpy as np
        except ImportError:
            raise SkipTest('NumPy not available.')

        G = self.G
        h, a = networkx.hits_numpy(G)
        for n in G:
            assert_almost_equal(h[n], G.h[n], places=4)
        for n in G:
            assert_almost_equal(a[n], G.a[n], places=4)
Beispiel #18
0
def mineTrees(rf_model):
    result = pd.DataFrame(index=np.arange(0, rf_model.n_estimators),
                          columns=[
                              'nodes', 'edges', 'diameter', 'weak_components',
                              'strong_components', 'node_connectivity',
                              'mean_hub_score', 'mean_auth_score',
                              'median_degree', 'mean_degree'
                          ])
    for t in range(0, rf_model.n_estimators):
        tree = rf_model.estimators_[t]
        graph = nx.DiGraph()

        # export_graphviz(tree, out_file=str('results/trees/tree') + str(t) + '.dot',
        # feature_names=dataTrain.columns,class_names=data2.Class,rounded=True,
        # proportion=False,precision=2, filled=True)
        left_children = tree.tree_.children_left
        right_children = tree.tree_.children_right
        features = tree.tree_.feature
        for n in range(0, len(left_children)):
            node = features[n]
            l_child = left_children[n]
            r_child = right_children[n]
            if node >= 0:
                if l_child > 0 and features[l_child] >= 0:
                    graph.add_edge(node, features[l_child])
                if r_child > 0 and features[r_child] >= 0:
                    graph.add_edge(node, features[r_child])

        # Network metrics
        hubs, authorities = nx.hits_numpy(graph)
        mean_hub_score = np.mean(list(hubs.values()))
        mean_auth_score = np.mean(list(authorities.values()))
        nodes = nx.number_of_nodes(graph)
        diameter = nx.diameter(nx.to_undirected(graph))
        edges = nx.number_of_edges(graph)
        strong_comp = nx.number_strongly_connected_components(graph)
        weak_comp = nx.number_weakly_connected_components(graph)
        degrees = nx.average_degree_connectivity(graph, target="in")
        avg_in_degree = np.mean(list(degrees))
        median_in_degree = np.median(list(degrees))
        node_connectivity = nx.average_node_connectivity(graph)
        row = [
            nodes, edges, diameter, weak_comp, strong_comp, node_connectivity,
            mean_hub_score, mean_auth_score, median_in_degree, avg_in_degree
        ]

        result.loc[t] = row

    return result
Beispiel #19
0
    def rank(self,return_type='set'):
        #排除一些异常情况    
        graph = self.get_graph()
        if graph==None:
            graph = self.build_graph()


        h,a = nx.hits_numpy(graph)

        imp = a

        result = sorted(imp.items(), key=lambda d: d[1],reverse=True)

        if return_type == 'set':
            result = [x for (x,y) in result]
            return result
        else:
            return result
Beispiel #20
0
def info_network(G):
    from networkx.algorithms import bipartite
    from decimal import Decimal

    print G.number_of_nodes()
    print G.number_of_edges()

    print "average_neighbor_degree"
    dict = nx.average_neighbor_degree(G)
    list1 = dict.keys()
    list2 = dict.values()
    print list1
    print list2

    print "degree_assortativity_coefficient"
    print nx.degree_assortativity_coefficient(G)

    print "degree_pearson_correlation_coefficient"
    print nx.degree_pearson_correlation_coefficient(G)
    # print nx.k_nearest_neighbors(G)
    print "STOP HERE"

    print "bipartite.closeness_centrality(G,G.node)"
    dict2 = bipartite.closeness_centrality(G, G.node)
    list3 = dict2.values()
    print list3

    print "nx.degree_centrality(G)"
    dict3 = nx.degree_centrality(G)
    list4 = dict3.values()
    print list4

    print "nx.betweenness_centrality(G)"
    dict4 = nx.betweenness_centrality(G)
    list5 = dict4.values()
    print list5

    print "hits_numpy"
    dict5 = nx.hits_numpy(G)
    print dict5
Beispiel #21
0
def influence_charts_2013():
    #open bills, make bill graph
    f = open("senate_bills/senate_bills_113.txt", "r")
    bill_list = json.load(f)
    bills = bill_graph(bill_list)
    f.close()

    #influence model metrics

    in_degree_dict = bills.in_degree()
    sorted_in_degree = sorted(in_degree_dict.iteritems(),
                              key=operator.itemgetter(1))
    # this is the order we'll base all of the graphs on
    sorted_senators = [x[0] for x in sorted_in_degree]
    senator_count = len(sorted_senators)

    in_degree_values = [in_degree_dict[x] for x in sorted_senators]

    pagerank_dict = nx.pagerank_numpy(bills)
    pagerank_values = [pagerank_dict[x] for x in sorted_senators]

    hubs, authorities = nx.hits_numpy(bills)
    hubs_values = [hubs[x] for x in sorted_senators]
    authorities_values = [authorities[x] for x in sorted_senators]

    #get the colors we'll need for coloring
    f = open("party_dict.txt", "r")
    party_dict = json.load(f)
    color = [party_dict[x] for x in sorted_senators]

    #visualize in and out degree

    fig = plt.figure(num=None,
                     figsize=(18, 6),
                     dpi=80,
                     facecolor='w',
                     edgecolor='k')
    ax2 = fig.add_subplot(111)

    plt.bar(range(senator_count),
            in_degree_values,
            align='center',
            color=color)
    plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9)
    plt.xlabel("Senators by In-degree")
    plt.ylabel('In-degree')
    plt.show()

    #visualize centrality
    fig = plt.figure(num=None,
                     figsize=(18, 6),
                     dpi=80,
                     facecolor='w',
                     edgecolor='k')
    ax1 = fig.add_subplot(111)

    plt.bar(range(senator_count), pagerank_values, align='center', color=color)
    plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9)
    plt.xlabel("Senators by Pagerank Score")
    plt.ylabel('Pagerank Score')
    plt.show()

    fig = plt.figure(num=None,
                     figsize=(18, 6),
                     dpi=80,
                     facecolor='w',
                     edgecolor='k')
    ax3 = fig.add_subplot(111)

    plt.bar(range(senator_count), hubs_values, align='center', color=color)
    plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9)
    plt.xlabel("Senators by HITS Hub Score")
    plt.ylabel('Hub Score')
    plt.show()

    fig = plt.figure(num=None,
                     figsize=(18, 6),
                     dpi=80,
                     facecolor='w',
                     edgecolor='k')
    ax4 = fig.add_subplot(111)

    plt.bar(range(senator_count),
            authorities_values,
            align='center',
            color=color)
    plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9)
    plt.xlabel("Senators by HITS Authorities Score")
    plt.ylabel('Authorities Score')
    plt.show()

    voter_model_scores = {
        u'Burr, Richard': 60,
        u'Johnson, Tim': 50,
        u'Johanns, Mike': 60,
        u'Begich, Mark': 71,
        u'Inhofe, James M.': 38,
        u'McCain, John': 25,
        u'Portman, Rob': 89,
        u'Rockefeller, John D., IV': 71,
        u'Landrieu, Mary L.': 101,
        u'Heinrich, Martin': 20,
        u'Pryor, Mark L.': 61,
        u'Brown, Sherrod': 81,
        u'Toomey, Pat': 50,
        u'Cardin, Benjamin L.': 101,
        u'Tester, Jon': 91,
        u'Wyden, Ron': 81,
        u'Klobuchar, Amy': 50,
        u'Lee, Mike': 65,
        u'Fischer, Deb': 0,
        u'Bennet, Michael F.': 20,
        u'Blunt, Roy': 71,
        u'Collins, Susan M.': 81,
        u'Schumer, Charles E.': 61,
        u'Harkin, Tom': 91,
        u'McCaskill, Claire': 91,
        u'Lautenberg, Frank R.': 91,
        u'Cruz, Ted': 57,
        u'Schatz, Brian': 19,
        u'Feinstein, Dianne': 90,
        u'Coats, Daniel': 48,
        u'Hagan, Kay': 49,
        u'King, Angus S. Jr.': 10,
        u'Murray, Patty': 50,
        u'Enzi, Michael B.': 49,
        u'Whitehouse, Sheldon': 40,
        u'Reed, Jack': 51,
        u'Ayotte, Kelly': 50,
        u'Levin, Carl': 61,
        u'Kaine, Tim': 19,
        u'Cowan, William M.': 0,
        u'Grassley, Chuck': 61,
        u'Baldwin, Tammy': 30,
        u'Chambliss, Saxby': 9,
        u'Gillibrand, Kirsten E.': 79,
        u'Sanders, Bernard': 60,
        u'Hoeven, John': 48,
        u'Leahy, Patrick J.': 71,
        u'Coons, Christopher A.': 38,
        u'Sessions, Jeff': 0,
        u'Thune, John': 61,
        u'Donnelly, Joe': 19,
        u'Moran, Jerry': 71,
        u'Hirono, Mazie K.': 40,
        u'Manchin, Joe, III': 40,
        u'Shelby, Richard C.': 90,
        u'Menendez, Robert': 71,
        u'Mikulski, Barbara A.': 81,
        u'Alexander, Lamar': 69,
        u'Scott, Tim': 0,
        u'Hatch, Orrin G.': 88,
        u'Cornyn, John': 60,
        u'Booker, Cory A.': 0,
        u'Blumenthal, Richard': 81,
        u'Markey, Edward J.': 0,
        u'Rubio, Marco': 68,
        u'Risch, James E.': 9,
        u'Cochran, Thad': 20,
        u'Franken, Al': 69,
        u'Coburn, Tom': 86,
        u'Kirk, Mark Steven': 69,
        u'Durbin, Richard': 69,
        u'Boozman, John': 48,
        u'Corker, Bob': 9,
        u'Barrasso, John': 59,
        u'Flake, Jeff': 25,
        u'Murphy, Christopher S.': 20,
        u'Stabenow, Debbie': 80,
        u'Johnson, Ron': 49,
        u'Carper, Thomas R.': 61,
        u'Udall, Tom': 40,
        u'Roberts, Pat': 67,
        u'Shaheen, Jeanne': 81,
        u'Vitter, David': 71,
        u'Paul, Rand': 49,
        u'Reid, Harry': 30,
        u'Heller, Dean': 40,
        u'Warren, Elizabeth': 10,
        u'McConnell, Mitch': 0,
        u'Isakson, Johnny': 30,
        u'Baucus, Max': 81,
        u'Casey, Robert P., Jr.': 90,
        u'Graham, Lindsey': 70,
        u'Heitkamp, Heidi': 36,
        u'Udall, Mark': 48,
        u'Murkowski, Lisa': 40,
        u'Cantwell, Maria': 61,
        u'Crapo, Mike': 0,
        u'Warner, Mark R.': 61,
        u'Boxer, Barbara': 70,
        u'Merkley, Jeff': 91,
        u'Nelson, Bill': 100,
        u'Wicker, Roger F.': 20,
        u'Chiesa, Jeff': 0
    }
    voter_model_values = [voter_model_scores[x] for x in sorted_senators]

    fig = plt.figure(num=None,
                     figsize=(18, 6),
                     dpi=80,
                     facecolor='w',
                     edgecolor='k')
    ax5 = fig.add_subplot(111)

    plt.bar(range(senator_count),
            voter_model_values,
            align='center',
            color=color)
    plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9)
    plt.xlabel("Senators by Voter-Model Score")
    plt.ylabel('Voter-Model Score')
    plt.show()
Beispiel #22
0
def hits_authorities(G):
    """The HITS authorities centralities"""
    return nx.hits_numpy(G)[1]
Beispiel #23
0
def hits_hubs(G):
    """The HITS hubs centralities"""
    return nx.hits_numpy(G)[0]
Beispiel #24
0
def mineTrees(rf_model, dataset_name, feature_name):
    """
	:param rf_model:    trained random forest
	:return:            dataframe containing network metrics for each estimator in rf_model
	"""
    result = pd.DataFrame(index=np.arange(0, rf_model.n_estimators),
                          columns=[
                              'dataset_name', 'feature_name', 'tree_id',
                              'nodes', 'edges', 'diameter', 'weak_components',
                              'strong_components', 'node_connectivity',
                              'mean_hub_score', 'mean_auth_score',
                              'median_degree', 'mean_degree', 'depth',
                              'total_length_of_branches', 'branching_ratio'
                          ])

    for t in range(0, rf_model.n_estimators):
        # print("Tree " + str(t) + " is processing")
        tree = rf_model.estimators_[t]
        graph = nx.DiGraph()  # Multiple edges are not allowed
        # export_graphviz(tree, out_file=str('results/trees/tree') + str(t) + '.dot',
        # feature_names=dataTrain.columns,class_names=data2.Class,rounded=True,
        # proportion=False,precision=2, filled=True)
        left_children = tree.tree_.children_left
        right_children = tree.tree_.children_right
        features = tree.tree_.feature

        depth = tree.tree_.max_depth
        total_length_of_branches = tree.tree_.node_count - 1
        branching_ratio = depth / total_length_of_branches

        for n in range(0, len(left_children)):

            node = features[n]

            l_child = left_children[n]
            r_child = right_children[n]

            if node >= 0:
                if l_child > 0 and features[l_child] >= 0:
                    # print(str(t) + ">" + str(node) + " l" + str(l_child) + " " + str(features[l_child]))
                    graph.add_edge(node, features[l_child])
                if r_child > 0 and features[r_child] >= 0:
                    # print(str(t) + ">" + str(node) + " r" + str(r_child) + " " + str(features[r_child]))
                    graph.add_edge(
                        node, features[r_child]
                    )  # compare the graph with the original decision tree to make that the graph is correct

        # Network metrics
        with warnings.catch_warnings():  # temporarily suppress warnings
            warnings.filterwarnings('ignore', category=np.ComplexWarning)
            hubs, authorities = nx.hits_numpy(graph)

        mean_hub_score = np.mean(list(
            hubs.values()))  # hub = lots of links from
        mean_auth_score = np.mean(list(
            authorities.values()))  # authority = lots of links to

        nodes = nx.number_of_nodes(graph)
        if nodes == 0:  # empty tree would crash
            warnings.warn(f'Empty decision tree: t={t}', UserWarning)
            result.drop(index=t, inplace=True
                        )  # data would be nan and crash next rf so delete row
            continue

        diameter = nx.diameter(nx.to_undirected(
            graph))  # greatest distance b/w any pair of vertices
        edges = nx.number_of_edges(graph)

        # size of subgraph where all components are connected
        strong_comp = nx.number_strongly_connected_components(
            graph)  # directed
        weak_comp = nx.number_weakly_connected_components(
            graph)  # ignore direction

        degrees = nx.average_degree_connectivity(
            graph, target="in")  # num incoming edges for vertices
        avg_in_degree = np.mean(list(degrees))
        median_in_degree = np.median(list(degrees))

        # This line is VERY slow, setting it to dummy value for now
        # node_connectivity = nx.average_node_connectivity(graph)  # how well the graph is connected
        node_connectivity = -1

        row = [
            dataset_name, feature_name, t, nodes, edges, diameter, weak_comp,
            strong_comp, node_connectivity, mean_hub_score, mean_auth_score,
            median_in_degree, avg_in_degree, depth, total_length_of_branches,
            branching_ratio
        ]

        result.loc[t] = row

    return result
Beispiel #25
0
def influence_charts_2013():
    # open bills, make bill graph
    f = open("senate_bills/senate_bills_113.txt", "r")
    bill_list = json.load(f)
    bills = bill_graph(bill_list)
    f.close()

    # influence model metrics

    in_degree_dict = bills.in_degree()
    sorted_in_degree = sorted(in_degree_dict.iteritems(), key=operator.itemgetter(1))
    # this is the order we'll base all of the graphs on
    sorted_senators = [x[0] for x in sorted_in_degree]
    senator_count = len(sorted_senators)

    in_degree_values = [in_degree_dict[x] for x in sorted_senators]

    pagerank_dict = nx.pagerank_numpy(bills)
    pagerank_values = [pagerank_dict[x] for x in sorted_senators]

    hubs, authorities = nx.hits_numpy(bills)
    hubs_values = [hubs[x] for x in sorted_senators]
    authorities_values = [authorities[x] for x in sorted_senators]

    # get the colors we'll need for coloring
    f = open("party_dict.txt", "r")
    party_dict = json.load(f)
    color = [party_dict[x] for x in sorted_senators]

    # visualize in and out degree

    fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor="w", edgecolor="k")
    ax2 = fig.add_subplot(111)

    plt.bar(range(senator_count), in_degree_values, align="center", color=color)
    plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9)
    plt.xlabel("Senators by In-degree")
    plt.ylabel("In-degree")
    plt.show()

    # visualize centrality
    fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor="w", edgecolor="k")
    ax1 = fig.add_subplot(111)

    plt.bar(range(senator_count), pagerank_values, align="center", color=color)
    plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9)
    plt.xlabel("Senators by Pagerank Score")
    plt.ylabel("Pagerank Score")
    plt.show()

    fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor="w", edgecolor="k")
    ax3 = fig.add_subplot(111)

    plt.bar(range(senator_count), hubs_values, align="center", color=color)
    plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9)
    plt.xlabel("Senators by HITS Hub Score")
    plt.ylabel("Hub Score")
    plt.show()

    fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor="w", edgecolor="k")
    ax4 = fig.add_subplot(111)

    plt.bar(range(senator_count), authorities_values, align="center", color=color)
    plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9)
    plt.xlabel("Senators by HITS Authorities Score")
    plt.ylabel("Authorities Score")
    plt.show()

    voter_model_scores = {
        u"Burr, Richard": 60,
        u"Johnson, Tim": 50,
        u"Johanns, Mike": 60,
        u"Begich, Mark": 71,
        u"Inhofe, James M.": 38,
        u"McCain, John": 25,
        u"Portman, Rob": 89,
        u"Rockefeller, John D., IV": 71,
        u"Landrieu, Mary L.": 101,
        u"Heinrich, Martin": 20,
        u"Pryor, Mark L.": 61,
        u"Brown, Sherrod": 81,
        u"Toomey, Pat": 50,
        u"Cardin, Benjamin L.": 101,
        u"Tester, Jon": 91,
        u"Wyden, Ron": 81,
        u"Klobuchar, Amy": 50,
        u"Lee, Mike": 65,
        u"Fischer, Deb": 0,
        u"Bennet, Michael F.": 20,
        u"Blunt, Roy": 71,
        u"Collins, Susan M.": 81,
        u"Schumer, Charles E.": 61,
        u"Harkin, Tom": 91,
        u"McCaskill, Claire": 91,
        u"Lautenberg, Frank R.": 91,
        u"Cruz, Ted": 57,
        u"Schatz, Brian": 19,
        u"Feinstein, Dianne": 90,
        u"Coats, Daniel": 48,
        u"Hagan, Kay": 49,
        u"King, Angus S. Jr.": 10,
        u"Murray, Patty": 50,
        u"Enzi, Michael B.": 49,
        u"Whitehouse, Sheldon": 40,
        u"Reed, Jack": 51,
        u"Ayotte, Kelly": 50,
        u"Levin, Carl": 61,
        u"Kaine, Tim": 19,
        u"Cowan, William M.": 0,
        u"Grassley, Chuck": 61,
        u"Baldwin, Tammy": 30,
        u"Chambliss, Saxby": 9,
        u"Gillibrand, Kirsten E.": 79,
        u"Sanders, Bernard": 60,
        u"Hoeven, John": 48,
        u"Leahy, Patrick J.": 71,
        u"Coons, Christopher A.": 38,
        u"Sessions, Jeff": 0,
        u"Thune, John": 61,
        u"Donnelly, Joe": 19,
        u"Moran, Jerry": 71,
        u"Hirono, Mazie K.": 40,
        u"Manchin, Joe, III": 40,
        u"Shelby, Richard C.": 90,
        u"Menendez, Robert": 71,
        u"Mikulski, Barbara A.": 81,
        u"Alexander, Lamar": 69,
        u"Scott, Tim": 0,
        u"Hatch, Orrin G.": 88,
        u"Cornyn, John": 60,
        u"Booker, Cory A.": 0,
        u"Blumenthal, Richard": 81,
        u"Markey, Edward J.": 0,
        u"Rubio, Marco": 68,
        u"Risch, James E.": 9,
        u"Cochran, Thad": 20,
        u"Franken, Al": 69,
        u"Coburn, Tom": 86,
        u"Kirk, Mark Steven": 69,
        u"Durbin, Richard": 69,
        u"Boozman, John": 48,
        u"Corker, Bob": 9,
        u"Barrasso, John": 59,
        u"Flake, Jeff": 25,
        u"Murphy, Christopher S.": 20,
        u"Stabenow, Debbie": 80,
        u"Johnson, Ron": 49,
        u"Carper, Thomas R.": 61,
        u"Udall, Tom": 40,
        u"Roberts, Pat": 67,
        u"Shaheen, Jeanne": 81,
        u"Vitter, David": 71,
        u"Paul, Rand": 49,
        u"Reid, Harry": 30,
        u"Heller, Dean": 40,
        u"Warren, Elizabeth": 10,
        u"McConnell, Mitch": 0,
        u"Isakson, Johnny": 30,
        u"Baucus, Max": 81,
        u"Casey, Robert P., Jr.": 90,
        u"Graham, Lindsey": 70,
        u"Heitkamp, Heidi": 36,
        u"Udall, Mark": 48,
        u"Murkowski, Lisa": 40,
        u"Cantwell, Maria": 61,
        u"Crapo, Mike": 0,
        u"Warner, Mark R.": 61,
        u"Boxer, Barbara": 70,
        u"Merkley, Jeff": 91,
        u"Nelson, Bill": 100,
        u"Wicker, Roger F.": 20,
        u"Chiesa, Jeff": 0,
    }
    voter_model_values = [voter_model_scores[x] for x in sorted_senators]

    fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor="w", edgecolor="k")
    ax5 = fig.add_subplot(111)

    plt.bar(range(senator_count), voter_model_values, align="center", color=color)
    plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9)
    plt.xlabel("Senators by Voter-Model Score")
    plt.ylabel("Voter-Model Score")
    plt.show()
Beispiel #26
0
def network_info(post_id):

    default = [None] * 33
    edge_info, all_miss_perc = get_edges(post_id)

    graph = nx.DiGraph(edge_info)
    all_num_nodes = graph.number_of_nodes()
    all_density = nx.density(graph)
    num_weak_comp = nx.number_weakly_connected_components(graph)
    num_strong_comp = nx.number_strongly_connected_components(graph)

    main_graph = get_main_component(graph)

    if main_graph == None:
        rv = [
            post_id, all_miss_perc, all_num_nodes, all_density, num_weak_comp,
            num_strong_comp
        ] + [None] * 27
        return rv

    main_graph_full, main_miss_perc = add_author_info(main_graph)

    nodes = dict(main_graph_full.nodes(data=True))
    # edges = dict(main_graph_full.edges(data=True))

    #net work structure

    # get rid of post where main graph less than 10
    main_num_nodes = main_graph_full.number_of_nodes()
    if main_num_nodes < 10:
        rv = [
            post_id, all_miss_perc, all_num_nodes, all_density, num_weak_comp,
            num_strong_comp, main_num_nodes
        ] + [None] * 26
        return rv
    else:
        main_density = nx.density(main_graph_full)

        main_trans = nx.transitivity(main_graph_full)
        short_path = nx.average_shortest_path_length(main_graph_full,
                                                     weight='weight')

        eccen = nx.eccentricity(main_graph_full)
        eccen_val_list = list(eccen.values())
        diameter = np.max(eccen_val_list)
        radius = np.min(eccen_val_list)
        eccen_mean = np.mean(eccen_val_list)
        eccen_std = np.std(eccen_val_list)

        periphery = [key for key, value in eccen.items() if value == diameter]
        peri_lkarma, peri_ckarma = get_eccen_avg(nodes, periphery)
        center = [key for key, value in eccen.items() if value == radius]
        center_lkarma, center_ckarma = get_eccen_avg(nodes, center)

        # node importance
        in_degree_dict = dict(nx.in_degree_centrality(main_graph_full))
        in_degree_lkarma, in_drgree_ckarma = get_top_karma_avg(
            nodes, in_degree_dict)
        out_degree_dict = dict(nx.out_degree_centrality(main_graph_full))
        out_degree_lkarma, out_degree_ckarma = get_top_karma_avg(
            nodes, out_degree_dict)
        close_dict = dict(nx.closeness_centrality(main_graph_full))
        close_lkarma, close_ckarma = get_top_karma_avg(nodes, close_dict)

        between_dict = dict(
            nx.betweenness_centrality(main_graph_full,
                                      weight='weight',
                                      endpoints=True))
        between_lkarma, between_ckarma = get_top_karma_avg(nodes, between_dict)

        hub_dict, auth_dict = nx.hits_numpy(main_graph_full)
        hub_lkarma, hub_ckarma = get_top_karma_avg(nodes, hub_dict)
        auth_lkarma, auth_ckarma = get_top_karma_avg(nodes, auth_dict)

        try:
            author_id = FULL_DATA.loc[post_id]['author']
            author_lkarma, author_ckarma = AUTHOR.loc[author_id]
        except:
            author_lkarma = None
            author_ckarma = None

        rv = [
            post_id, all_miss_perc, all_num_nodes, all_density, num_weak_comp,
            num_strong_comp, main_miss_perc, main_num_nodes, main_density,
            main_trans, short_path, diameter, radius, eccen_mean, eccen_std,
            peri_lkarma, peri_ckarma, center_lkarma, center_ckarma,
            in_degree_lkarma, in_drgree_ckarma, out_degree_lkarma,
            out_degree_ckarma, close_lkarma, close_ckarma, between_lkarma,
            between_ckarma, hub_lkarma, hub_ckarma, auth_lkarma, auth_ckarma,
            author_lkarma, author_ckarma
        ]

        return rv
    data['out_degree'] = g_mult_dir.out_degree()
    data['degree_centrality'] = {
        n: d / float(g_mult_undir.size())
        for n, d in g_mult_undir.degree().items()
    }
    data['closeness_centrality'] = nx.closeness_centrality(g_mult_undir)
    data['betweenness_centrality'] = nx.betweenness_centrality(g_mult_undir)
    data['eigenvector_centrality'] = nx.eigenvector_centrality_numpy(
        g_single_undir, weight=None)
    data['katz_centrality'] = nx.katz_centrality_numpy(g_single_undir,
                                                       weight=None)

    # B) link analysis
    print 'link analysis'
    data['page_rank'] = nx.pagerank_numpy(g_mult_undir, weight=None)
    hits = nx.hits_numpy(g_mult_dir)
    data['hits_hub'] = hits[0]
    data['hits_auth'] = hits[1]

    # C) Reichherzer & Leake 2006
    print 'cmaps'
    data['u_weights'] = util.comp_u_weights(g_mult_dir)
    data['l_weights'] = util.comp_l_weights(g_mult_dir)
    # HARD
    p = [0, 2.235, 1.764]
    data['hard'] = {
        n: p[0] * data['hits_hub'][n] + p[1] * data['hits_auth'][n] +
        p[2] * data['u_weights'][n]
        for n in g_mult_dir.nodes()
    }
    # CRD
Beispiel #28
0
def f_hits_authorities(G,year=False):
  return nx.hits_numpy(G)[1]
Beispiel #29
0
def f_hits_hubs(G,year=False):
  return nx.hits_numpy(G)[0]
Beispiel #30
0
 def hits(self):
     return networkx.hits_numpy(self.graph)
def hits(net):
    hubs,authorities = nx.hits_numpy(net)
    return distri(hubs.values(), 'hubs')+distri(authorities.values(),'authorities')
def get_graph(Mat_D, Threshold, percentageConnections=False, complet=False):
    import scipy.io as sio
    import numpy as np
    import networkx as nx
    import pandas as pd
    import os
    Data = sio.loadmat(Mat_D)
    matX = Data['Correlation']  #[:tamn,:tamn]
    labels = Data['labels']
    print(np.shape(matX))
    print(np.shape(labels))
    print(np.min(matX), np.max(matX))

    if percentageConnections:
        if percentageConnections > 0 and percentageConnections < 1:
            for i in range(-100, 100):
                per = np.sum(matX > i / 100.) / np.size(matX)
                if per <= Threshold:
                    Threshold = i / 100.
                    break
            print(Threshold)
        else:
            print('The coefficient is outside rank')

    #Lista de conexion del grafo
    row, col = np.shape(matX)
    e = []
    for i in range(1, row):
        for j in range(i):
            if complet:
                e.append((labels[i], labels[j], matX[i, j]))
            else:
                if matX[i, j] > Threshold:
                    e.append((labels[i], labels[j], matX[i, j]))

    print(np.shape(e)[0], int(((row - 1) * row) / 2))

    #Generar grafo
    G = nx.Graph()
    G.add_weighted_edges_from(e)
    labelNew = list(G.nodes)

    #Metricas por grafo (ponderados)
    Dpc = nx.degree_pearson_correlation_coefficient(G, weight='weight')
    cluster = nx.average_clustering(G, weight='weight')

    #No ponderados
    estra = nx.estrada_index(G)
    tnsity = nx.transitivity(G)
    conNo = nx.average_node_connectivity(G)
    ac = nx.degree_assortativity_coefficient(G)

    #Metricas por nodo
    tam = 15
    BoolCenV = False
    BoolLoad = False
    alpha = 0.1
    beta = 1.0

    katxCN = nx.katz_centrality_numpy(G,
                                      alpha=alpha,
                                      beta=beta,
                                      weight='weight')
    bcen = nx.betweenness_centrality(G, weight='weight')
    av_nd = nx.average_neighbor_degree(G, weight='weight')
    ctr = nx.clustering(G, weight='weight')
    ranPaN = nx.pagerank_numpy(G, weight='weight')
    Gol_N = nx.hits_numpy(G)
    Dgc = nx.degree_centrality(G)
    cl_ce = nx.closeness_centrality(G)
    cluster_Sq = nx.square_clustering(G)
    centr = nx.core_number(G)
    cami = nx.node_clique_number(G)
    camiN = nx.number_of_cliques(G)
    trian = nx.triangles(G)
    colorG = nx.greedy_color(G)
    try:
        cenVNum = nx.eigenvector_centrality_numpy(G, weight='weight')
        tam = tam + 1
        BoolCenV = True
    except TypeError:
        print(
            "La red es muy pequeña y no se puede calcular este parametro gil")
    except:
        print('NetworkXPointlessConcept: graph null')
    if Threshold > 0:
        carga_cen = nx.load_centrality(G, weight='weight')  #Pesos  positivos
        BoolLoad = True
        tam = tam + 1
    #katxC=nx.katz_centrality(G, alpha=alpha, beta=beta, weight='weight')
    #cenV=nx.eigenvector_centrality(G,weight='weight')
    #cenV=nx.eigenvector_centrality(G,weight='weight')
    #Golp=nx.hits(G)
    #Gol_si=nx.hits_scipy(G)
    #ranPa=nx.pagerank(G, weight='weight')
    #ranPaS=nx.pagerank_scipy(G, weight='weight')

    matrix_datos = np.zeros((tam, np.shape(labelNew)[0]))
    tam = 15
    print(np.shape(matrix_datos))
    lim = np.shape(labelNew)[0]
    for i in range(lim):
        roi = labelNew[i]
        #print(roi)
        matrix_datos[0, i] = katxCN[roi]
        matrix_datos[1, i] = bcen[roi]
        matrix_datos[2, i] = av_nd[roi]
        matrix_datos[3, i] = ctr[roi]
        matrix_datos[4, i] = ranPaN[roi]
        matrix_datos[5, i] = Gol_N[0][roi]
        matrix_datos[6, i] = Gol_N[1][roi]
        matrix_datos[7, i] = Dgc[roi]
        matrix_datos[8, i] = cl_ce[roi]
        matrix_datos[9, i] = cluster_Sq[roi]
        matrix_datos[10, i] = centr[roi]
        matrix_datos[11, i] = cami[roi]
        matrix_datos[12, i] = camiN[roi]
        matrix_datos[13, i] = trian[roi]
        matrix_datos[14, i] = colorG[roi]
        if BoolCenV:
            matrix_datos[15, i] = cenVNum[roi]
            tam = tam + 1
        if BoolLoad:
            matrix_datos[16, i] = carga_cen[roi]
            tam = tam + 1
        #matrix_datos[0,i]=katxC[roi]
        #matrix_datos[2,i]=cenV[roi]
        #matrix_datos[7,i]=Golp[0][roi]
        #matrix_datos[9,i]=Gol_si[0][roi]
        #matrix_datos[10,i]=Golp[1][roi]
        #matrix_datos[12,i]=Gol_si[1][roi]
        #matrix_datos[22,i]=ranPa[roi]
        #matrix_datos[24,i]=ranPaS[roi]
    FuncName = [
        'degree_pearson_correlation_coefficient', 'average_clustering',
        'estrada_index', 'transitivity', 'average_node_connectivity',
        'degree_assortativity_coefficient', 'katz_centrality_numpy',
        'betweenness_centrality', 'average_neighbor_degree', 'clustering',
        'pagerank_numpy', 'hits_numpy0', 'hits_numpy1', 'degree_centrality',
        'closeness_centrality', 'square_clustering', 'core_number',
        'node_clique_number', 'number_of_cliques', 'triangles', 'greedy_color',
        'eigenvector_centrality_numpy', 'load_centrality'
    ]
    frame = pd.DataFrame(matrix_datos)
    frame.columns = labelNew
    frame.index = FuncName[6:tam]

    Resul = os.getcwd()
    out_data = Resul + '/graph_metrics.csv'
    out_mat = Resul + '/graph_metrics_global.mat'

    frame.to_csv(out_data)
    sio.savemat(
        out_mat, {
            FuncName[0]: Dpc,
            FuncName[1]: cluster,
            FuncName[2]: estra,
            FuncName[3]: tnsity,
            FuncName[4]: conNo,
            FuncName[5]: ac
        })
    return out_data, out_mat
Beispiel #33
0
 def hits(self):
     return networkx.hits_numpy(self.graph)
Beispiel #34
0
def hits_authorities(G):
    """The HITS authorities centralities"""
    return nx.hits_numpy(G)[1]
Beispiel #35
0
for u, v, a in G.edges(data=True):
    try:
        x = model.similarity(u, v)
        G[u][v]['weight'] = abs(x)
    except KeyError:
        continue

for u, v, a in G.edges(data=True):
    print(u, v, a)

bw_centrality = nx.betweenness_centrality(G, normalized=True, weight='weight')
d_centrality = nx.degree_centrality(G)
c_centrality = nx.closeness_centrality(G, distance='weight')
pr = nx.pagerank_numpy(G, alpha=0.9, weight='weight')
hub, authority = nx.hits_numpy(G)

avg_bw = 0
avg_d = 0
avg_c = 0
avg_pr = 0
avg_hub = 0
avg_authority = 0

for i in bw_centrality:
    avg_bw += bw_centrality[i]

avg_bw = avg_bw / len(bw_centrality)

for i in d_centrality:
    avg_d += d_centrality[i]
Beispiel #36
0
def hits_hubs(G):
    """The HITS hubs centralities"""
    return nx.hits_numpy(G)[0]
#directed Graphでなくてはだめ

#print(nx.clustering(G,0))
#print nx.average_clustering(G)

# print nx.diameter(G, e=None)
#print nx.center(G, e=None)

#print "nx.max_flow(G, 'Fosl2.48h', 'Hoxa9.48h')"
#print nx.max_flow(G, 'Fosl2.48h', 'Hoxa9.48h')


#print nx.network_simplex(G)

#print nx.pagerank(G,alpha=0.9)

#print "nx.pagerank_numpy"
#print nx.pagerank_numpy(G,alpha=0.9)

print "hits_numpy"
dict5 = nx.hits_numpy(G)

print dict5

# print(nx.shortest_path(G,source=0,target=4))
# print(nx.average_shortest_path_length(G))

#paths = nx.all_simple_paths(G, source=0, target=3, cutoff=2)
#print(list(paths))
csvfile.close()
G = session_transition_graph(log)
assert(G.number_of_nodes() == 14457)
assert(G.number_of_edges() == 27315)

"""
Plot graph of user sessions parcours
"""
# pos = nx.spring_layout(G); nx.draw_networkx(G, pos, with_labels=False, node_size=1); plt.show()

print("degree_assortativity_coefficient %2.2f" % nx.degree_assortativity_coefficient(G))
print("degree_pearson_correlation_coefficient %2.2f" % nx.degree_pearson_correlation_coefficient(G))

assert(not nx.is_connected(G))
assert(nx.number_connected_components(G) == 171)

counter = Counter([c.number_of_edges() for c in nx.connected_component_subgraphs(G)])
print(counter)
# Counter({1: 141, 2: 19, 3: 4, 5: 2, 6: 2, 4: 1, 27085: 1, 13: 1})

large_graphs = [c for c in nx.connected_component_subgraphs(G) if c.number_of_edges() > 20]
largest_graph = large_graphs[0]
#nx.write_gexf(largest_graph, './model/user_sessions.gexf')
colors = ['r' if 'label' in d and d['label'] == 1 else 'b' for n, d in largest_graph.nodes_iter(data=True)]
sizes = [50 if 'label' in d and d['label'] == 1 else 1 for n, d in largest_graph.nodes_iter(data=True)]
#pos = nx.spring_layout(largest_graph); nx.draw_networkx(largest_graph, pos, with_labels=False, colors=colors, node_size=sizes); plt.show()

hits = nx.hits_numpy(largest_graph)

print('aa')


def get_top_keys(dictionary, top):
    items = dictionary.items()
    items.sort(reverse=True, key=lambda x: x[1])
    return items[:top]


print "Reading in Full Graph."
stdout.flush()
g = read_edgelist('data/wiki-Talk.txt', create_using=DiGraph(), nodetype=int)

print "HITS."
stdout.flush()

hubs, authorities = hits_numpy(g)

file = open("results/hubs_numpy.txt", "w+")
file.write("Top 100 Hubs by HITS\n")
for node in get_top_keys(hubs, 100):
    file.write("{} {}\n".format(node[0], node[1]))
file.close()

file = open("results/authorities_numpy.txt", "w+")
file.write("Top 100 Authorities by HITS\n")
for node in get_top_keys(authorities, 100):
    file.write("{} {}\n".format(node[0], node[1]))
file.close()

print "We Done Here."
file.write("\nTop 20 Nodes by Katz\n")
katz_centrality = nx.katz_centrality_numpy(g)
for node in get_top_keys(katz_centrality, 20):
	file.write("{}, {}\n".format(node[0], node[1]))
file.flush()

print "PageRank."
sys.stdout.flush()
file.write("\nTop 20 Nodes by PageRank\n")
pagerank = nx.pagerank_numpy(g)
for node in get_top_keys(pagerank, 20):
	file.write("{}, {}\n".format(node[0], node[1]))
file.flush()

print "HITS."
sys.stdout.flush()
file.write("\nTop 20 Nodes by HITS: Hubs\n")
hits = nx.hits_numpy(g)
for node in get_top_keys(hits[0], 20):
	file.write("{}, {}\n".format(node[0], node[1]))
file.flush()
file.write("Top 20 Nodes by HITS: Authorities\n")
pagerank = nx.hits_numpy(g)
for node in get_top_keys(hits[1], 20):
	file.write("{}, {}\n".format(node[0], node[1]))

file.close()

print "We Done Here."
sys.stdout.flush()
Beispiel #41
0
def hits(graph):
    """"""
    h, _ = nx.hits_numpy(ensure_connected(graph))
    return list(h.values())
#print(nx.clustering(G,0))
#print nx.average_clustering(G)

# print nx.diameter(G, e=None)
#print nx.center(G, e=None)

print "max_flow(G, 'Fosl2.48h', 'Hoxa9.48h')"
print nx.max_flow(G, 'Fosl2.48h', 'Hoxa9.48h')

#print nx.network_simplex(G)

#print nx.pagerank(G,alpha=0.9)

print "pagerank_numpy"
print nx.pagerank_numpy(G,alpha=0.9)

print "hits_numpy"
print nx.hits_numpy(G)

# print(nx.shortest_path(G,source=0,target=4))
# print(nx.average_shortest_path_length(G))

#paths = nx.all_simple_paths(G, source=0, target=3, cutoff=2)
#print(list(paths))



csvfile.close()


def hits(net):
    hubs, authorities = nx.hits_numpy(net)
    return distri(hubs.values(), 'hubs') + distri(authorities.values(),
                                                  'authorities')