Example #1
0
def assortativity(orig_g_M, otherModel_M, name):
	if len(orig_g_M) is not 0:
		dorig = pd.DataFrame()
		for g in orig_g_M:
				kcdf = pd.DataFrame.from_dict(nx.average_neighbor_degree(g).items())
				kcdf['k'] = g.degree().values()
				dorig = pd.concat([dorig, kcdf])

		print "orig"
		gb = dorig.groupby(['k'])
		zz = len(gb[1].mean().values)
		sa =	int(math.ceil(zz/75))
		if sa == 0: sa=1
		for x in range(0, len(gb[1].mean().values), sa):
				print "(" + str(gb.mean().index[x]) + ", " + str(gb[1].mean().values[x]) + ")"

	if len(otherModel_M) is not 0:
			dorig = pd.DataFrame()
			for g in otherModel_M:
					kcdf = pd.DataFrame.from_dict(nx.average_neighbor_degree(g).items())
					kcdf['k'] = g.degree().values()
					dorig = pd.concat([dorig, kcdf])

			print "the other model ", name
			gb = dorig.groupby(['k'])
			zz = len(gb[1].mean().values)
			sa =	int(math.ceil(zz/75))
			if sa == 0: sa=1
			for x in range(0, len(gb[1].mean().values), sa):
					print "(" + str(gb.mean().index[x]) + ", " + str(gb[1].mean().values[x]) + ")"

	return
def assortativity(orig_g_M, otherModel_M, name):
  if len(orig_g_M) is not 0:
    dorig = pd.DataFrame()
    for g in orig_g_M:
        kcdf = pd.DataFrame.from_dict(nx.average_neighbor_degree(g).items())
        kcdf['k'] = g.degree().values()
        dorig = pd.concat([dorig, kcdf])

    print "orig"
    gb = dorig.groupby(['k'])
    zz = len(gb[1].mean().values)
    sa =  int(math.ceil(zz/75))
    if sa == 0: sa=1
    for x in range(0, len(gb[1].mean().values), sa):
        print "(" + str(gb.mean().index[x]) + ", " + str(gb[1].mean().values[x]) + ")"

  if len(otherModel_M) is not 0:
      dorig = pd.DataFrame()
      for g in otherModel_M:
          kcdf = pd.DataFrame.from_dict(nx.average_neighbor_degree(g).items())
          kcdf['k'] = g.degree().values()
          dorig = pd.concat([dorig, kcdf])

      print "the other model ", name
      gb = dorig.groupby(['k'])
      zz = len(gb[1].mean().values)
      sa =  int(math.ceil(zz/75))
      if sa == 0: sa=1
      for x in range(0, len(gb[1].mean().values), sa):
          print "(" + str(gb.mean().index[x]) + ", " + str(gb[1].mean().values[x]) + ")"

  return
Example #3
0
    def test_degree_p4_weighted(self):
        G = nx.path_graph(4)
        G[1][2]['weight'] = 4
        answer = {0: 2, 1: 1.8, 2: 1.8, 3: 2}
        nd = nx.average_neighbor_degree(G, weight='weight')
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D, weight='weight')
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D, weight='weight')
        assert_equal(nd, answer)
        nd = nx.average_neighbor_degree(D,
                                        source='out',
                                        target='out',
                                        weight='weight')
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D,
                                        source='in',
                                        target='in',
                                        weight='weight')
        assert_equal(nd, answer)
    def test_degree_p4_weighted(self):
        G = nx.path_graph(4)
        G[1][2]["weight"] = 4
        answer = {0: 2, 1: 1.8, 2: 1.8, 3: 2}
        nd = nx.average_neighbor_degree(G, weight="weight")
        assert nd == answer

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D, weight="weight")
        assert nd == answer

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D, weight="weight")
        assert nd == answer
        nd = nx.average_neighbor_degree(D,
                                        source="out",
                                        target="out",
                                        weight="weight")
        assert nd == answer

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D,
                                        source="in",
                                        target="in",
                                        weight="weight")
        assert nd == answer
 def test_degree_barrat(self):
     G = nx.star_graph(5)
     G.add_edges_from([(5, 6), (5, 7), (5, 8), (5, 9)])
     G[0][5]["weight"] = 5
     nd = nx.average_neighbor_degree(G)[5]
     assert nd == 1.8
     nd = nx.average_neighbor_degree(G, weight="weight")[5]
     assert nd == pytest.approx(3.222222, abs=1e-5)
 def test_degree_barrat(self):
     G=nx.star_graph(5)
     G.add_edges_from([(5,6),(5,7),(5,8),(5,9)])
     G[0][5]['weight']=5
     nd = nx.average_neighbor_degree(G)[5]
     assert_equal(nd,1.8)
     nd = nx.average_neighbor_degree(G,weight='weight')[5]
     assert_almost_equal(nd,3.222222,places=5)
Example #7
0
 def test_degree_barrat(self):
     G = nx.star_graph(5)
     G.add_edges_from([(5, 6), (5, 7), (5, 8), (5, 9)])
     G[0][5]['weight'] = 5
     nd = nx.average_neighbor_degree(G)[5]
     assert_equal(nd, 1.8)
     nd = nx.average_neighbor_degree(G, weight='weight')[5]
     assert_almost_equal(nd, 3.222222, places=5)
def graph_feature():
    # Create a directed graph
    G = nx.read_edgelist('Cit-HepTh.txt',
                         delimiter='\t',
                         create_using=nx.DiGraph())

    print("Nodes: ", G.number_of_nodes())
    print("Edges: ", G.number_of_edges())

    # Read training data
    train_ids = list()
    y_train = list()
    with open('train.csv', 'r') as f:
        next(f)
        for line in f:
            t = line.split(',')
            train_ids.append(t[0])
            y_train.append(t[1][:-1])

    n_train = len(train_ids)
    unique = np.unique(y_train)
    print("\nNumber of classes: ", unique.size)

    # Create the training matrix. Each row corresponds to an article.
    # Use the following 3 features for each article:
    # (1) out-degree of node
    # (2) in-degree of node
    # (3) average degree of neighborhood of node
    avg_neig_deg = nx.average_neighbor_degree(G, nodes=train_ids)
    X_train = np.zeros((n_train, 3))
    for i in range(n_train):
        X_train[i, 0] = G.out_degree(train_ids[i])
        X_train[i, 1] = G.in_degree(train_ids[i])
        X_train[i, 2] = avg_neig_deg[train_ids[i]]

    # Read test data
    test_ids = list()
    with open('test.csv', 'r') as f:
        next(f)
        for line in f:
            test_ids.append(line[:-2])

    # Create the test matrix. Use the same 3 features as above
    n_test = len(test_ids)
    avg_neig_deg = nx.average_neighbor_degree(G, nodes=test_ids)
    X_test = np.zeros((n_test, 3))
    for i in range(n_test):
        X_test[i, 0] = G.out_degree(test_ids[i])
        X_test[i, 1] = G.in_degree(test_ids[i])
        X_test[i, 2] = avg_neig_deg[test_ids[i]]

    print("\nTrain matrix dimensionality: ", X_train.shape)
    print("Test matrix dimensionality: ", X_test.shape)
    return X_train, y_train, X_test
Example #9
0
 def average_neighbor_degree_sum(self):
     if (self.average_neighbor_degree_dict == None):
         self.average_neighbor_degree_dict = nx.average_neighbor_degree(
             self.graph, weight="weight")
         time.sleep(1)
     return self.average_neighbor_degree_dict[
         self.node_1] + self.average_neighbor_degree_dict[self.node_2]
def get_nearest_neighbor_degree(network: nx.graph):
    """
    Calculates the average nearest neighbor degree for each node for the given
    list of networks.

    Parameters
    ----------
    network: a NetworkX graph objects

    Returns
    -------
    degrees: list-like
        an array of node degree
    nearest_neighbor_degrees: list-like
        an array of node average nearest neighbor degree in the same order
        as degrees
    """
    degrees = []
    nearest_neighbor_degrees = []

    deg = nx.degree(network)
    nnd = nx.average_neighbor_degree(network)

    for (key, item) in sorted(deg.items(), key=operator.itemgetter(1)):
        degrees.append(item)
        nearest_neighbor_degrees.append(nnd[key])

    return degrees, nearest_neighbor_degrees
Example #11
0
def get_graphnodefeatures(g):
    for node_id, node_data in g.nodes(data=True):
        node_data["feature"] = [
            g.degree(node_id, weight="weight"),
            nx.average_neighbor_degree(g, nodes=[node_id],
                                       weight="weight")[node_id], 1, 1, 1
        ]
	def create_graph(self):
		g = nx.Graph()
		duplicated_nodes_list = self.only_nodes.iloc[:,0].append(self.only_nodes.iloc[:,1]).reset_index(drop=True)
		nodes_list = duplicated_nodes_list.values.tolist()
		No_duplicate_nodes = set(nodes_list)
		# print(len(No_duplicate_nodes))#327
		g.add_nodes_from(No_duplicate_nodes)
		g.add_edges_from(self.No_duplicate_links)
		# nx.draw(g,node_size = 1.5)#with_labels=True
		# plt.draw()
		# plt.show()
		link_density = nx.density(g)
		# print(link_density)#0.109
		average_degree = nx.average_neighbor_degree(g)
		# numbers degreeede= [average_degree[key] for key in average_degree]
		# mean = statistics.mean(numbers)
		# var = statistics.variance(numbers)
		# print(var)
		degree_correlation = nx.degree_pearson_correlation_coefficient(g) 
		# print(degree_correlation)#0.033175769936049336
		average_clustering = nx.average_clustering(g)
		# print(average_clustering)#0.5035048191728447
		average_hopcount = nx.average_shortest_path_length(g)
		# print(average_hopcount)#2.1594341569576554
		diameter = nx.diameter(g)
		# print(diameter)#4
		# A = nx.adjacency_matrix(g)
		A_eigenvalue = nx.adjacency_spectrum(g)
		# print(max(A_eigenvalue))#(41.231605032525835+0j)
		G_eigenvalue = nx.laplacian_spectrum(g)
		# print(sorted(G_eigenvalue))#1.9300488624481513
		return g, nodes_list, No_duplicate_nodes, link_density, average_degree
Example #13
0
 def test_lattice3(self):
     G = pr.generateGraph("lattice", N=1000, dim=2)
     assert len(G.nodes()) == 961
     assert len(G.edges()) == 1860
     degrees = nx.average_neighbor_degree(G).values()
     ave_degree = reduce(lambda x, y: x + y, degrees) / len(degrees)
     assert int(round(ave_degree)) == 4
def draw_graph(nodes, edges, graphs_dir, default_lang='all'):
    lang_graph = nx.MultiDiGraph()
    lang_graph.add_nodes_from(nodes)
    for edge in edges:
        if edges[edge] == 0:
            lang_graph.add_edge(edge[0], edge[1])
        else:
            lang_graph.add_edge(edge[0], edge[1], weight=float(edges[edge]), label=str(edges[edge]))

    # print graph info in stdout
    # degree centrality
    print('-----------------\n\n')
    print(default_lang)
    print(nx.info(lang_graph))
    try:
        # When ties are associated to some positive aspects such as friendship or collaboration,
        #  indegree is often interpreted as a form of popularity, and outdegree as gregariousness.
        DC = nx.degree_centrality(lang_graph)
        max_dc = max(DC.values())
        max_dc_list = [item for item in DC.items() if item[1] == max_dc]
    except ZeroDivisionError:
        max_dc_list = []
    # https://ru.wikipedia.org/wiki/%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%81%D0%BD%D1%8B%D0%B5_%D1%81%D0%B5%D1%82%D0%B8
    print('maxdc', str(max_dc_list), sep=': ')
    # assortativity coef
    AC = nx.degree_assortativity_coefficient(lang_graph)
    print('AC', str(AC), sep=': ')
    # connectivity
    print("Слабо-связный граф: ", nx.is_weakly_connected(lang_graph))
    print("количество слабосвязанных компонент: ", nx.number_weakly_connected_components(lang_graph))
    print("Сильно-связный граф: ", nx.is_strongly_connected(lang_graph))
    print("количество сильносвязанных компонент: ", nx.number_strongly_connected_components(lang_graph))
    print("рекурсивные? компоненты: ", nx.number_attracting_components(lang_graph))
    print("число вершинной связности: ", nx.node_connectivity(lang_graph))
    print("число рёберной связности: ", nx.edge_connectivity(lang_graph))
    # other info
    print("average degree connectivity: ", nx.average_degree_connectivity(lang_graph))
    print("average neighbor degree: ", sorted(nx.average_neighbor_degree(lang_graph).items(),
                                              key=itemgetter(1), reverse=True))
    # best for small graphs, and our graphs are pretty small
    print("pagerank: ", sorted(nx.pagerank_numpy(lang_graph).items(), key=itemgetter(1), reverse=True))

    plt.figure(figsize=(16.0, 9.0), dpi=80)
    plt.axis('off')
    pos = graphviz_layout(lang_graph)
    nx.draw_networkx_edges(lang_graph, pos, alpha=0.5, arrows=True)
    nx.draw_networkx(lang_graph, pos, node_size=1000, font_size=12, with_labels=True, node_color='green')
    nx.draw_networkx_edge_labels(lang_graph, pos, edges)

    # saving file to draw it with dot-graphviz
    # changing overall graph view, default is top-bottom
    lang_graph.graph['graph'] = {'rankdir': 'LR'}
    # marking with blue nodes with maximum degree centrality
    for max_dc_node in max_dc_list:
        lang_graph.node[max_dc_node[0]]['fontcolor'] = 'blue'
    write_dot(lang_graph, os.path.join(graphs_dir, default_lang + '_links.dot'))

    # plt.show()
    plt.savefig(os.path.join(graphs_dir, 'python_' + default_lang + '_graph.png'), dpi=100)
    plt.close()
Example #15
0
 def test_lattice6(self):
     G = pr.generateGraph("lattice", N=6400, dim=3)
     assert len(G.nodes()) == 5832
     assert len(G.edges()) == 16524
     degrees = nx.average_neighbor_degree(G).values()
     ave_degree = reduce(lambda x, y: x + y, degrees) / len(degrees)
     assert int(round(ave_degree)) == 6
Example #16
0
def get_initial_proj_nodes(G, key):
    """
    function that gets the graph and return the nodes that we would like them to be
    in the initial projection
    """
    # a dictionary of the nodes and their degrees
    dict_degrees = dict(G.degree(G.nodes()))
    # a dictionary of the nodes and the average degrees
    dict_avg_neighbor_deg = nx.average_neighbor_degree(G)
    # sort the dictionary
    sort_degrees = sorted(dict_degrees.items(), key=lambda pw: (pw[1], pw[0]))  # list
    # sort the dictionary
    sort_avg_n_d = sorted(dict_avg_neighbor_deg.items(), key=lambda pw: (pw[1], pw[0]))  # list
    # choose only some percents of the nodes with the maximum degree
    top_deg = sort_degrees[int(key * len(sort_degrees)):len(sort_degrees)]
    # choose only some percents of the nodes with the maximum average degree
    top_avgn_deg = sort_avg_n_d[int(key * len(sort_avg_n_d)):len(sort_avg_n_d)]
    # a code to choose the nodes that have maximum degree and also maximum average degree
    tmp_deg = top_deg
    tmp_n_deg = top_avgn_deg
    for i in range(len(top_deg)):
        tmp_deg[i] = list(tmp_deg[i])
        tmp_deg[i][1] = 5
    for i in range(len(top_avgn_deg)):
        tmp_n_deg[i] = list(tmp_n_deg[i])
        tmp_n_deg[i][1] = 10
    # the nodes with the maximal degree- the nodes we want to do the projection on
    final_nodes = np.intersect1d(tmp_n_deg, tmp_deg)
    list_final_nodes = list(final_nodes)
    for i in range(len(list_final_nodes)):
        list_final_nodes[i] = str(list_final_nodes[i])
    return list_final_nodes
def analyze_graph(G):    
    #centralities and node metrics
    out_degrees = G.out_degree()
    in_degrees = G.in_degree()
    betweenness = nx.betweenness_centrality(G)
    eigenvector = nx.eigenvector_centrality_numpy(G)
    closeness = nx.closeness_centrality(G)
    pagerank = nx.pagerank(G)
    avg_neighbour_degree = nx.average_neighbor_degree(G)
    redundancy = bipartite.node_redundancy(G)
    load = nx.load_centrality(G)
    hits = nx.hits(G)
    vitality = nx.closeness_vitality(G)
    
    for name in G.nodes():
        G.node[name]['out_degree'] = out_degrees[name]
        G.node[name]['in_degree'] = in_degrees[name]
        G.node[name]['betweenness'] = betweenness[name]
        G.node[name]['eigenvector'] = eigenvector[name]
        G.node[name]['closeness'] = closeness[name]
        G.node[name]['pagerank'] = pagerank[name]
        G.node[name]['avg-neigh-degree'] = avg_neighbour_degree[name]
        G.node[name]['redundancy'] = redundancy[name]
        G.node[name]['load'] = load[name]
        G.node[name]['hits'] = hits[name]
        G.node[name]['vitality'] = vitality[name]
        
    #communities
    partitions = community.best_partition(G)
    for member, c in partitions.items():
        G.node[member]['community'] = c   
    
    return G
def get_nearest_neighbor_degree(network):
    """
    Calculates the average nearest neighbor degree for each node for the given
    list of networks.

    Parameters
    ----------
    network: a NetworkX graph objects

    Returns
    -------
    degrees: list-like
        an array of node degree
    nearest_neighbor_degrees: list-like
        an array of node average nearest neighbor degree in the same order
        as degrees
    """

    degrees = []
    nearest_neighbor_degrees = []

    nodes = network.nodes()
    n_nodes = len(nodes)

    nn_dictionary = nx.average_neighbor_degree(network)

    for n in nodes:
        degrees.append(network.degree(n))
        k_nn = nn_dictionary[n]
        nearest_neighbor_degrees.append(k_nn)

    return degrees, nearest_neighbor_degrees
	def __init__(self, graph, node_1=None, node_2=None):
		self.graph = graph
		self.node_1 = node_1
		self.node_2 = node_2
		self.clustering_dict = nx.clustering(graph)
		self.betweenness_dict = nx.betweenness_centrality(graph)
		self.average_neighbor_degree_dict = nx.average_neighbor_degree(graph)
		
		self.attributes_map = {
			"adamic_adar_similarity": self.adamic_adar_similarity,	
			"average_clustering_coefficient": self.average_clustering_coefficient,	
			"average_neighbor_degree_sum": self.average_neighbor_degree_sum,	
			"betweenness_centrality": self.betweenness_centrality,	
			"closeness_centrality_sum": self.closeness_centrality_sum,	
			"clustering_coefficient_sum": self.clustering_coefficient_sum,	
			"common_neighbors": self.common_neighbors,	
			"cosine": self.cosine,	
			"jaccard_coefficient": self.jaccard_coefficient,	
			"katz_measure": self.katz_measure,	
			"preferential_attachment": self.preferential_attachment,		
			"square_clustering_coefficient_sum": self.square_clustering_coefficient_sum,	
			"sum_of_neighbors": self.sum_of_neighbors,	
			"sum_of_papers": self.sum_of_papers,
			"get_shortest_path_length": self.get_shortest_path_length,
			"get_second_shortest_path_length": self.get_second_shortest_path_length				
		}
		
		if(self.node_1 != None and self.node_2 != None):
			self.neighbors_1 = self.all_neighbors(self.node_1)
			self.neighbors_2 = self.all_neighbors(self.node_2)
Example #20
0
 def generate_feature(self):
     mydata = genfromtxt("graph/"+self.filename, delimiter=',')
     
     adjacency = mydata[1:,:]
     
     G = nx.from_numpy_matrix(adjacency, create_using=nx.DiGraph())
     
     # INitialize Data set
     data = np.array([[]])
     k = []
     v = []
     #wCC
     wCC_dict = nx.clustering(G)
     wCC_k = ['wCC_'+str(x) for x in list(wCC_dict.keys())]
     wCC_v = list(wCC_dict.values())
     
     #wAND
     wAND_dict = nx.average_neighbor_degree(G)
     wAND_k = ['wAND_'+str(x) for x in list(wAND_dict.keys())]
     wAND_v = list(wAND_dict.values())
     
     #wNBC
     wNBC_dict = nx.betweenness_centrality(G)
     wNBC_k = ['wAND_'+str(x) for x in list(wNBC_dict.keys())]
     wNBC_v = list(wNBC_dict.values())
     
     # Merge
     k = wNBC_k + wAND_k + wCC_k
     v = wNBC_v + wAND_v + wCC_v
     #Insert
     data = np.append(data,[k], axis = 1)
     data = np.append(data,[v], axis = 0)
     return data
Example #21
0
def get_initial_proj_nodes_by_degrees(G, number):
    """
    Function to decide which nodes would be in the initial embedding by highest degree.
    :param G: Our graph
    :param number: Controls number of nodes in the initial projection
    :return: A list of the nodes that are in the initial projection
    """
    nodes = list(G.nodes())
    # a dictionary of the nodes and their degrees
    dict_degrees = dict(G.degree(G.nodes()))
    # a dictionary of the nodes and the average degrees
    dict_avg_neighbor_deg = nx.average_neighbor_degree(G)
    # sort the dictionary
    sort_degrees = sorted(dict_degrees.items(), key=lambda pw:
                          (pw[1], pw[0]))  # list
    sort_degrees.reverse()
    new_dict_degrees = {}
    for i in range(len(sort_degrees)):
        new_dict_degrees.update({sort_degrees[i][0]: i})
    sort_avg_n_d = sorted(dict_avg_neighbor_deg.items(),
                          key=lambda pw: (pw[1], pw[0]))  # list
    sort_avg_n_d.reverse()
    new_dict_avg_degrees = {}
    for i in range(len(sort_avg_n_d)):
        new_dict_avg_degrees.update({sort_avg_n_d[i][0]: i})
    new_dict = {}
    for node in nodes:
        new_dict.update(
            {node: new_dict_degrees[node] + new_dict_avg_degrees[node]})
    x = {k: v for k, v in sorted(new_dict.items(), key=lambda item: item[1])}
    initial_nodes = []
    keys = list(x.keys())
    for i in range(number):
        initial_nodes.append(keys[i])
    return initial_nodes
def save_metrics_random():

    global density, clustering, assortativity, initial_edges, impact, degree, avg_neigh_degree
    global ranked_degrees, ranked_nodes, degree_final, node_final
    new_edges = list(REDS.edges())
    intersect = [
        filter(lambda x: x in new_edges, sublist) for sublist in initial_edges
    ]
    impact.append(len(new_edges) - len(intersect))
    density.append(round(nx.density(REDS), 2))
    clustering.append(round(nx.average_clustering(REDS), 2))
    assortativity.append(round(nx.degree_assortativity_coefficient(REDS), 2))
    degree.append(REDS.degree(moving))
    avg_neigh_degree.append(
        round(nx.average_neighbor_degree(REDS, nodes=[moving]).values()[0], 2))
    degree_set = []
    node_name = []
    for i in REDS.nodes():
        degree_set.append(REDS.degree(i))
        node_name.append(i)
    degree_final.append(degree_set)
    node_final.append(node_name)
    degree_set, node_name = zip(*sorted(zip(degree_set, node_name)))
    ranked_degrees.append(degree_set)
    ranked_nodes.append(node_name)
Example #23
0
def sort_adjacency(g, a, attr):
    node_k1 = dict(g.degree())  ## sort by degree
    node_k2 = nx.average_neighbor_degree(g)  ## sort by neighbor degree
    node_closeness = nx.closeness_centrality(g)
    node_betweenness = nx.betweenness_centrality(g)

    node_sorting = list()

    for node_id in g.nodes():
        node_sorting.append(
            (node_id, node_k1[node_id], node_k2[node_id],
             node_closeness[node_id], node_betweenness[node_id]))

    node_descending = sorted(node_sorting,
                             key=lambda x: (x[1], x[2], x[3], x[4]),
                             reverse=True)
    mapping = dict()

    for i, node in enumerate(node_descending):
        mapping[node[0]] = i

        temp = attr[node[0]]  ## switch node attributes according to sorting
        attr[node[0]] = attr[i]
        attr[i] = temp

    a = nx.adjacency_matrix(g, nodelist=mapping.keys()).todense(
    )  ## switch graph node ids according to sorting

    return g, a, attr
	def __init__(self, graph, node_1=None, node_2=None):
		self.graph = graph
		self.node_1 = node_1
		self.node_2 = node_2
		self.clustering_dict = nx.clustering(graph)
		self.betweenness_dict = nx.betweenness_centrality(graph)
		self.average_neighbor_degree_dict = nx.average_neighbor_degree(graph)
		
		self.attributes_map = {
			"adamic_adar_similarity": self.adamic_adar_similarity,	
			"average_clustering_coefficient": self.average_clustering_coefficient,	
			"average_neighbor_degree_sum": self.average_neighbor_degree_sum,	
			"betweenness_centrality": self.betweenness_centrality,	
			"closeness_centrality_sum": self.closeness_centrality_sum,	
			"clustering_coefficient_sum": self.clustering_coefficient_sum,	
			"common_neighbors": self.common_neighbors,	
			"cosine": self.cosine,	
			"jaccard_coefficient": self.jaccard_coefficient,	
			"katz_measure": self.katz_measure,	
			"preferential_attachment": self.preferential_attachment,		
			"square_clustering_coefficient_sum": self.square_clustering_coefficient_sum,	
			"sum_of_neighbors": self.sum_of_neighbors,	
			"sum_of_papers": self.sum_of_papers,
			"get_shortest_path_length": self.get_shortest_path_length,
			"get_second_shortest_path_length": self.get_second_shortest_path_length				
		}
		
		if(self.node_1 != None and self.node_2 != None):
			self.neighbors_1 = self.all_neighbors(self.node_1)
			self.neighbors_2 = self.all_neighbors(self.node_2)
def get_avg_node_degree(G):
    avg_node_degree = nx.average_neighbor_degree(G)
    avg_node_degree_list = list(dict(avg_node_degree).values())
    name = f'AvgNodeDegree_{G.name}'

    return {
        name: avg_node_degree_list,
    }
Example #26
0
def average_neighbor_degree(gnx, f, ft):
    start = timer.start(ft, 'average_neighbor_degree')
    average_neighbor_degree_dict = nx.average_neighbor_degree(gnx)
    timer.stop(ft, start)
    for k in average_neighbor_degree_dict:
        f.writelines(
            str(k) + ',' + str(average_neighbor_degree_dict[k]) + '\n')
    return average_neighbor_degree_dict
Example #27
0
def metrics_report(g: nx.Graph):
    C = nx.average_clustering(g)
    knn = np.mean(np.array(list(dict(nx.average_neighbor_degree(g)).values())))
    k = np.mean(np.array(list(dict(g.degree).values())))
    E = g.number_of_edges()
    N = g.number_of_nodes()
    l = nx.average_shortest_path_length(g)
    return pd.DataFrame(data={'C': C, 'k_nn': knn, 'k': k, 'E': E, 'N': N, 'l': l}, index=[0])
Example #28
0
    def test_degree_p4(self):
        G=nx.path_graph(4)
        answer={0:2,1:1.5,2:1.5,3:2}
        nd = nx.average_neighbor_degree(G)
        assert_equal(nd,answer)
        
        D=G.to_directed()
        nd = nx.average_neighbor_degree(D)
        assert_equal(nd,answer)

        D=G.to_directed()
        nd = nx.average_neighbor_out_degree(D)
        assert_equal(nd,answer)

        D=G.to_directed()
        nd = nx.average_neighbor_in_degree(D)
        assert_equal(nd,answer)
    def test_degree_p4(self):
        G = nx.path_graph(4)
        answer = {0: 2, 1: 1.5, 2: 1.5, 3: 2}
        nd = nx.average_neighbor_degree(G)
        assert nd == answer

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D)
        assert nd == answer

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D)
        assert nd == answer

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D, source="in", target="in")
        assert nd == answer
Example #30
0
    def test_degree_p4(self):
        G = nx.path_graph(4)
        answer = {0: 2, 1: 1.5, 2: 1.5, 3: 2}
        nd = nx.average_neighbor_degree(G)
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D)
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D)
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D, source='in', target='in')
        assert_equal(nd, answer)
    def test_degree_k4(self):
        G = nx.complete_graph(4)
        answer = {0: 3, 1: 3, 2: 3, 3: 3}
        nd = nx.average_neighbor_degree(G)
        assert nd == answer

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D)
        assert nd == answer

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D)
        assert nd == answer

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D, source="in", target="in")
        assert nd == answer
Example #32
0
def average_neighbor_degree(self,node):
  # same caching technique as in self.clustering_coefficient
  # might also break for very large graphs
  # nx.average_neighbor_degree(self.graph, nodes=node) might be the way to go

  if not hasattr(self, 'all_average_neighbor_degrees'):
    self.all_average_neighbor_degrees = nx.average_neighbor_degree(self.graph)
  return self.all_average_neighbor_degrees[node]
Example #33
0
    def test_degree_k4(self):
        G = nx.complete_graph(4)
        answer = {0: 3, 1: 3, 2: 3, 3: 3}
        nd = nx.average_neighbor_degree(G)
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D)
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D)
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D, source='in', target='in')
        assert_equal(nd, answer)
Example #34
0
    def test_degree_k4(self):
        G=nx.complete_graph(4)
        answer={0:3,1:3,2:3,3:3}
        nd = nx.average_neighbor_degree(G)
        assert_equal(nd,answer)
        
        D=G.to_directed()
        nd = nx.average_neighbor_degree(D)
        assert_equal(nd,answer)

        D=G.to_directed()
        nd = nx.average_neighbor_out_degree(D)
        assert_equal(nd,answer)

        D=G.to_directed()
        nd = nx.average_neighbor_in_degree(D)
        assert_equal(nd,answer)
Example #35
0
def clustering_analys(DF_adj, re_type):
	#测试参数的函数。re_type是返回值的类型
	labels = list(DF_adj.index)
	#print(DF_adj_1,DF_adj)
	#Network graph
	G = nx.Graph()
	G_i = nx.DiGraph()
	G.add_nodes_from(labels)
	G_i.add_nodes_from(labels)
	#Connect nodes
	for i in range(DF_adj.shape[0]):
	    col_label = DF_adj.columns[i]
	    for j in range(DF_adj.shape[1]):
	        row_label = DF_adj.index[j]
	        node = DF_adj.iloc[i,j]
	        if node != 0:
	            #print(node,DF_adj[labels[i]][labels[j]])
	            #print(node)
	            G.add_edge(col_label,row_label,weight = node)
	            G_i.add_edge(col_label,row_label,weight = node)
	if(re_type == 1):
		return dict_avg(nx.clustering(G))#取平均,队伍或者队员都可以
	elif(re_type == 2):
		L = nx.normalized_laplacian_matrix(G)
		e = np.linalg.eigvals(L.A)
		#print("Largest eigenvalue:", max(e))#衡量什么同行网络
		return max(e)
	elif(re_type == 3):
		return nx.algebraic_connectivity(G)
	elif(re_type == 4):
		return(nx.reciprocity(G_i))
	elif(re_type == 5):
		return(nx.transitivity(G_i))
	elif(re_type == 6):
		return(dict_max(nx.in_degree_centrality(G_i)))
	elif(re_type == 7):
		return(dict_max(nx.out_degree_centrality(G_i)))
	elif(re_type == 8):
		try:
			return(dict_avg(nx.pagerank(G, alpha=0.9)))
		except:
			return(0.01)
	elif(re_type == 9):
		try:
			return(dict_avg(nx.eigenvector_centrality(G)))
		except:
			return(0.25)
	elif(re_type == 10):
		return(dict_avg(nx.average_neighbor_degree(G_i)))
	print("-----------------")
	print(nx.closeness_centrality(G))#衡量星际球员
	print("-----------------")
	print(nx.pagerank(G, alpha=0.9))#衡量球员
	print("-----------------")
	print(nx.eigenvector_centrality(G))#衡量球员
	print("-----------------")
	print()#宏观的连通性
	print("-----------------")
Example #36
0
    def test_degree_p4_weighted(self):
        G = nx.path_graph(4)
        G[1][2]['weight'] = 4
        answer = {0: 2, 1: 1.8, 2: 1.8, 3: 2}
        nd = nx.average_neighbor_degree(G, weighted=True)
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_degree(D, weighted=True)
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_out_degree(D, weighted=True)
        assert_equal(nd, answer)

        D = G.to_directed()
        nd = nx.average_neighbor_in_degree(D, weighted=True)
        assert_equal(nd, answer)
Example #37
0
    def test_degree_p4_weighted(self):
        G=nx.path_graph(4)
        G[1][2]['weight']=4
        answer={0:2,1:1.8,2:1.8,3:2}
        nd = nx.average_neighbor_degree(G,weight='weight')
        assert_equal(nd,answer)
        
        D=G.to_directed()
        nd = nx.average_neighbor_degree(D,weight='weight')
        assert_equal(nd,answer)

        D=G.to_directed()
        nd = nx.average_neighbor_out_degree(D,weight='weight')
        assert_equal(nd,answer)

        D=G.to_directed()
        nd = nx.average_neighbor_in_degree(D,weight='weight')
        assert_equal(nd,answer)
Example #38
0
def descriptives(G,grouping = None):
    degree = nx.degree_histogram(G)
    plt.bar(x = range(len(degree)), height = degree)
    plt.savefig('images/degree_hist.png')
    plt.close()
    neighbor_degree = nx.average_neighbor_degree(G)
    dict_to_hist(neighbor_degree,'neighbor_degree')
    degree_conn = nx.average_degree_connectivity(G)
    dict_to_hist(degree_conn,'degree_conn')
def get_degree_correlation(g, method = 'average', mode = 'both'):
	""" 
		The average neighbor degree/in-degree/out-degree distribution grouped by degree. Similar to the histogram shows the possible degree k,
		and average/median clustering coefficient of nodes with degree k in graph g.

		Parameters:
		-----------
			g: NetworkX Graph
			mode: str, ('in', 'out', 'both'), (default = 'both')
			method: str, ('average', 'median'), (default = 'average')
		Returns:
		--------
			xdata, ydata, a 2-tuple of array, (k, <Knn>(k)), where <Knn>(k) denotes as the average/median degree
	"""
	# re implement with the function = nx.average_degree_connectivity
	if mode == 'both':
		d = g.degree()
		k = nx.average_neighbor_degree(g)
	elif mode == 'in':
		d = g.in_degree()
		k = nx.average_neighbor_degree(g, source = 'in', target = 'in')
	elif mode == 'out':
		d = g.out_degree()
		k = nx.average_neighbor_degree(g, source = 'out', target = 'out')
	else:
		raise NameError("mode must be 'in', 'out', or 'both'")
	ck = defaultdict(list)
	#group the nodes by degree
	for n in g.nodes_iter():
		ck[d[n]].append(k[n])
	xdata, ydata = list(), list()
	if method == 'average':
		for x, y in ifilter(lambda x: x[0] > 0 and average(x[1]) > 0, ck.iteritems()):
			xdata.append(x)
			ydata.append(average(y))
	elif method == 'median':
		for x, y in ifilter(lambda x: x[0] > 0 and median(x[1]) > 0, ck.iteritems()):
			xdata.append(x)
			ydata.append(median(y))
	else:
		raise NameError("method must be 'average' or 'median'")
	xdata = array(xdata)
	ydata = array(ydata)
	return(xdata, ydata)
Example #40
0
 def average_neighbor_degree(self, G, name):
     """
     分析图G的平均邻度 (在最大连通子图上求)
     :param G:
     :param name:
     :return:
     """
     if type(G) == nx.DiGraph:  # 有向图要求最大连通子图 + 权重要求和
         G = G.subgraph(max(nx.weakly_connected_components(G),
                            key=len))  # 求弱连通子图
         G1 = G.to_undirected(reciprocal=False)  # 存在一条边即可
         G2 = G.to_undirected(reciprocal=True)  # 只有相互转发才会保存
         for edge in G2.edges:
             if edge[0] != edge[1]:
                 G1.add_edge(
                     edge[0],
                     edge[1],
                     weight=G.get_edge_data(edge[0], edge[1])['weight'] +
                     G.get_edge_data(edge[1], edge[0])['weight'])
         G = G1
     else:  # 无向图直接求最大连通子图
         G = G.subgraph(max(nx.connected_components(G), key=len))  # 最大连通子图
     print("%s: 节点个数%s 边个数%s" % (name, len(G.nodes), len(G.edges)))
     degrees = defaultdict(int)  # 某个度的出现次数
     avg_avg_neighbor_degree = defaultdict(int)
     avg_avg_weighted_neighbor_degree = defaultdict(float)
     avg_neighbor_degree = nx.average_neighbor_degree(G)
     avg_weighted_neighbor_degree = nx.average_neighbor_degree(
         G, weight="weight")
     for node in G.nodes:
         d = G.degree[node]
         degrees[d] += 1
         avg_avg_neighbor_degree[d] += avg_neighbor_degree[node]
         avg_avg_weighted_neighbor_degree[
             d] += avg_weighted_neighbor_degree[node]
     for d in degrees:
         avg_avg_neighbor_degree[d] /= degrees[d]
         avg_avg_weighted_neighbor_degree[d] /= degrees[d]
     np.save(name + ".neighbor_degree.npy",
             np.array(sorted(avg_avg_neighbor_degree.items())))
     np.save(name + ".weighted_neighbor_degree.npy",
             np.array(sorted(avg_avg_weighted_neighbor_degree.items())))
def describe_graph(G):
    """Graph description"""

    # GRAPH DESCRIPTION
    graph_desc = pd.Series()
    # n. nodes
    graph_desc["number_of_nodes"] = G.number_of_nodes()
    # n. edges
    graph_desc["number_of_edges"] = G.number_of_edges()
    # n. of selfloops
    graph_desc["number_of_selfloops"] = len(G.selfloop_edges())

    # density
    graph_desc["average_shortest_path_length"] = nx.average_shortest_path_length(G)
    # connectivity
    # graph_desc.append(pd.Series(nx.degree_assortativity_coefficient(G), name="degree_assortativity_coefficient"))
    graph_desc["degree_pearson_correlation_coefficient"] = nx.degree_pearson_correlation_coefficient(G)

    # NODE DESCRIPTION
    node_desc = list()
    # n. of neighbours
    node_desc.append(pd.Series(G.degree(), name="degree"))
    node_desc.append(pd.Series(nx.average_neighbor_degree(G), name="average_neighbor_degree"))
    # n. of outgoing
    outgoing = pd.Series(G.in_degree(), name="in_degree")
    node_desc.append(outgoing)
    # n. of incoming
    incoming = pd.Series(G.out_degree(), name="out_degree")
    node_desc.append(incoming)
    # fold change out/in
    ratio = np.log2(outgoing + 1) - np.log2(incoming + 1)
    node_desc.append(pd.Series(ratio, name="out_in_degree_fold_change"))

    # centrality
    # degree based
    node_desc.append(pd.Series(nx.degree_centrality(G), name="degree_centrality"))
    node_desc.append(pd.Series(nx.in_degree_centrality(G), name="in_degree_centrality"))
    node_desc.append(pd.Series(nx.out_degree_centrality(G), name="out_degree_centrality"))
    # closest-path based
    # node_desc.append(pd.Series(nx.closeness_centrality(G), name="closeness_centrality"))
    # node_desc.append(pd.Series(nx.betweenness_centrality(G), name="betweenness_centrality"))
    # # eigenvector-based
    # node_desc.append(pd.Series(nx.eigenvector_centrality(G), name="eigenvector_centrality"))
    # node_desc.append(pd.Series(nx.katz_centrality_numpy(G), name="katz_centrality"))
    # # load-based
    # node_desc.append(pd.Series(nx.load_centrality(G), name="load_centrality"))

    return (graph_desc, pd.DataFrame(node_desc).T)
Example #42
0
def analyze_graphs(graphs, days):
    undirected_graphs = list(map(lambda G: G.to_undirected(), graphs))
    graph_days = dict(zip(undirected_graphs, days))

    connected_graphs = list(filter(lambda G: nx.is_connected(G), undirected_graphs))
    connected_days = dict(zip(connected_graphs, list(map(
        lambda G: graph_days[G], connected_graphs))))

    metrics = {
        #"average_shortest_path_lengths": [lambda G: nx.average_shortest_path_length(G), connected_graphs, connected_days],
        "clustering": [lambda G: nx.average_clustering(G), undirected_graphs, graph_days],
        "average_neighbor_degree": [lambda G: nx.average_neighbor_degree(G), graphs, graph_days],
        "min_weighted_vertex_cover": [lambda G: len(min_weighted_vertex_cover(G)), undirected_graphs, graph_days],
        #"eccentricity": [lambda G: np.mean(nx.eccentricity(G).values()), connected_graphs, connected_days],
        #"diameter": [lambda G: nx.diameter(G), connected_graphs, connected_days],
        #"periphery": [lambda G: len(nx.periphery(G)), connected_graphs, connected_days],
        "degree_centralities": [lambda G: np.mean(nx.degree_centrality(G).values()), graphs, graph_days],
        "in_degree_centralities": [lambda G: np.mean(nx.in_degree_centrality(G).values()), graphs, graph_days],
        "out_degree_centralities": [lambda G: np.mean(nx.out_degree_centrality(G).values()), graphs, graph_days],
        "closeness_centralities": [lambda G: np.mean(nx.closeness_centrality(G).values()), graphs, graph_days],
        "betweenness_centralities": [lambda G: np.mean(nx.betweenness_centrality(G).values()), graphs, graph_days]
    }

    for metric in metrics:
        print("Analyzing {}...".format(metric))

        function = metrics[metric][0]
        which_graphs = metrics[metric][1]
        which_days = metrics[metric][2].values()
        yArray = list(map(function, which_graphs))

        print(which_days)
        print(yArray)

        plt.plot(which_days, yArray)
        plt.xlabel("Day")
        plt.ylabel(metric)
        plt.title("{} Over Time".format(metric))

        plt.savefig("{}_VS_Time.png".format(metric))
        plt.close()
def node_analysis(G, rule):
    if rule == 'degree':
        return nx.degree(G)
    elif rule == 'clustering':
        return nx.clustering(G)
    elif rule == 'closeness' or rule == 'centrality':
        return nx.closeness_centrality(G)
    elif rule == 'betweeness':
        return nx.betweenness_centrality(G)
    elif rule == 'average neighbor degree':
        return nx.average_neighbor_degree(G)
    elif rule == 'component':
        comp = nx.connected_components(G)
        components = {}
        for i,c in enumerate(comp):
            for node in c:
                components[node] = i
        return components
    else:
        print "Node assignment rule {0} not recognized.".format(rule)
        sys.exit()
Example #44
0
def info_network(G):
    from networkx.algorithms import bipartite
    from decimal import Decimal

    print G.number_of_nodes()
    print G.number_of_edges()

    print "average_neighbor_degree"
    dict = nx.average_neighbor_degree(G)
    list1 = dict.keys()
    list2 = dict.values()
    print list1
    print list2

    print "degree_assortativity_coefficient"
    print nx.degree_assortativity_coefficient(G)

    print "degree_pearson_correlation_coefficient"
    print nx.degree_pearson_correlation_coefficient(G)
    # print nx.k_nearest_neighbors(G)
    print "STOP HERE"

    print "bipartite.closeness_centrality(G,G.node)"
    dict2 = bipartite.closeness_centrality(G, G.node)
    list3 = dict2.values()
    print list3

    print "nx.degree_centrality(G)"
    dict3 = nx.degree_centrality(G)
    list4 = dict3.values()
    print list4

    print "nx.betweenness_centrality(G)"
    dict4 = nx.betweenness_centrality(G)
    list5 = dict4.values()
    print list5

    print "hits_numpy"
    dict5 = nx.hits_numpy(G)
    print dict5
def save_metrics():

	global density,clustering,assortativity,initial_edges,impact,degree, avg_neigh_degree, initial_edges
	global ranked_nodes, ranked_degrees,degree_final,node_final, moving
	new_edges=list(RGG.edges())
	intersect = [filter(lambda x: x in new_edges, sublist) for sublist in initial_edges]
	impact.append(len(new_edges)-len(intersect))
	density.append(round(nx.density(RGG),2))
	clustering.append(round(nx.average_clustering(RGG),2))
	assortativity.append(round(nx.degree_assortativity_coefficient(RGG),2))
	degree.append(RGG.degree(moving))
	avg_neigh_degree.append(round(nx.average_neighbor_degree(RGG,nodes=[moving]).values()[0],2))
	degree_set=[]
	node_name=[]
	for i in RGG.nodes():
		degree_set.append(RGG.degree(i))
		node_name.append(i)
	degree_final.append(degree_set)
	node_final.append(node_name)
	degree_set,node_name=zip(*sorted(zip(degree_set,node_name)))
	ranked_degrees.append(degree_set)
	ranked_nodes.append(node_name)
 def __init__(self, graph, feature_list=[]):
     self.no_feature = 39
     self.G = graph
     self.nodes = nx.number_of_nodes(self.G)
     self.edges = nx.number_of_edges(self.G)
     self.Lap = nx.normalized_laplacian_matrix(self.G)
     # ??? how to check whether comparable, addable?
     self.eigvals = numpy.linalg.eigvals(self.Lap.A).tolist()
     try:
         self.radius = nx.radius(self.G)
     except nx.exception.NetworkXError:
         self.radius = "ND"
     try:
         self.ecc_dic = nx.eccentricity(self.G)
     except nx.exception.NetworkXError:
         self.ecc_dic = {}
     self.degree_dic = nx.average_neighbor_degree(self.G)
     self.pagerank = nx.pagerank(self.G).values()
     if feature_list == []:
         self.feature_list = list(range(1, self.no_feature + 1))
     else:
         self.feature_list = feature_list
     self.feature_vector = []
     self.feature_time = []
Example #47
0
def upload_file(request):
    f = request.FILES['ds']

    # little bit of hacking...
    format_type = 'json'
    try:
        ds = json.load(f)
    except:
        rows = csv.reader(f)
        ds = list()
        names = list()
        for row in rows:
            if len(names) == 0:
                for v in row:
                    names.append(v)
            else:
                idx = 0
                cur = dict()
                for v in row:
                    cur[names[idx]] = v
                    idx += 1
                ds.append(cur)
        format_type = 'csv'

    # Create network
    G = nx.Graph()

    # create date-centered / random id. not guaranteed to be unique. TODO change for scale.
    now = datetime.datetime.now()
    ds_id = "%d%d%d%d%d%d.%d" % (now.year, now.month, now.day, now.hour, now.minute, now.second, random.randint(0, 100000))

    # known formats.
    # based on collab2008.json
    if (type(ds) == type(dict()) and 
        'links' in ds and
        'nodes' in ds and
        len(ds['links']) > 0 and 
        len(ds['nodes']) > 0):
        idx = 0
        for node in ds['nodes']:
            G.add_node(idx, country_code=node['id'])
            idx += 1
        for link in ds['links']:
            G.add_edge(link['source'], link['target'], weight=link['weight'])

    # based on elena's airbnb data, formatted as csv, with columns:
    # ego_name, ego_lat, ego_lng, alter_name, alter_lat, alter_lng, weight
    elif (type(ds) == type(list()) and
          type(ds[0]) == type(dict()) and
          'ego_name' in ds[0] and
          'alter_name' in ds[0]):
        node_names = set()
        name_to_ll = dict()
        for d in ds:
            node_names.add(d['ego_name'])
            node_names.add(d['alter_name'])
            name_to_ll[d['ego_name']] = {
                'lat': d['ego_lat'],
                'lng': d['ego_lng']}
            name_to_ll[d['alter_name']] = {
                'lat': d['alter_lat'],
                'lng': d['alter_lng']}

        nodemap = dict()
        idx = 0
        for node_name in node_names:
            if node_name not in nodemap:
                G.add_node(idx, name=node_name, lat=name_to_ll[node_name]['lat'], lng=name_to_ll[node_name]['lng'])
                nodemap[node_name] = idx
                idx += 1

        for d in ds:
            G.add_edge(nodemap[d['ego_name']], nodemap[d['alter_name']], weight=int(d['weight']))
    else:
        return "ERROR_UNKNOWN_FORMAT"

    # Make sure that *every node* has a lat/lng
    no_geo = [] # maintain list of nodes removed
    ccode_to_ll = pickle.load(open('DATASETS/code_to_latlng.pkl', 'r'))
    for idx in G.node:
        # todo use HttpResponseBadRequest if no lat/lng exists
        if 'lat' not in G.node[idx] or 'lng' not in G.node[idx]:
            if 'country_code' in G.node[idx] and G.node[idx]['country_code'] in ccode_to_ll:
                c = G.node[idx]['country_code']
                G.node[idx]['lat'] = ccode_to_ll[c]['lat']
                G.node[idx]['lng'] = ccode_to_ll[c]['lng']
            else:
                no_geo.append(idx)

    # remove nodes with missing geo info
    for idx in no_geo:
        G.remove_node(idx)

    # Add *EXTRA* data. Not always guaranteed to be returned.
    ctor = pickle.load(open('DATASETS/country_to_continent.pkl', 'r'))
    code_to_country = pickle.load(open('DATASETS/code_to_country.pkl', 'r'))

    pp = pprint.PrettyPrinter(stream=sys.stderr)
    #pp.pprint(G.nodes(data=True))
    #pp.pprint(G.edges(data=True))

    closeness_vitality = nx.closeness_vitality(G)
    pagerank = nx.pagerank(G)
    degree_centrality = nx.degree_centrality(G)
    average_neighbor_degree = nx.average_neighbor_degree(G)
    for idx in G.node:
        if 'country_code' in G.node[idx] and G.node[idx]['country_code'] in ctor:
            G.node[idx]['region'] = ctor[G.node[idx]['country_code']]
        else:
            G.node[idx]['region'] = 'Unknown'

        if 'country_code' in G.node[idx] and G.node[idx]['country_code'] in code_to_country:
            G.node[idx]['country_name'] = code_to_country[G.node[idx]['country_code']]
        else:
            G.node[idx]['country_name'] = 'Unknown'

        G.node[idx]['closeness_vitality'] = closeness_vitality[idx]
        G.node[idx]['pagerank'] = pagerank[idx]
        G.node[idx]['degree'] = G.degree(idx)
        G.node[idx]['degree_centrality'] = degree_centrality[idx]
        G.node[idx]['average_neighbor_degree'] = average_neighbor_degree[idx]
        G.node[idx]['weight'] = G.degree(idx, 'weight')

        name = "Location: %.2f,%.2f" % (float(G.node[idx]['lat']), float(G.node[idx]['lng']))
        if 'name' in G.node[idx]:
            name += " (%s)" % G.node[idx]['name']
        elif 'country_name' in G.node[idx] and G.node[idx]['country_name'] != "Unknown":
            name += " (%s)" % G.node[idx]['country_name']
        G.node[idx]['name'] = name

    f = open("DATASETS/graph%s.pickle" % ds_id, 'w')
    pickle.dump(G, f)

    print >>sys.stderr, "UPLOAD COMPLETE. %d NODES IGNORED DUE TO MISSING GEO DATA." % len(no_geo)

    return ds_id
def parallelProperties(name):
	print name
	#creating multi directed graph
	MG=nx.MultiGraph()
	
	#reading file and adding nodes - edges
	file=None
	if pathToFiles!=None:
		file=open(pathToFiles+"/"+name,"r")
	else:
		file=open("./"+name,"r")
	listOfInteractions=[] #i will save interactions to rebuild the directed digraph
	for line in file:
		splittedLine=line.split("\t")
		node1=splittedLine[0]
		node2=splittedLine[1]
		listOfInteractions.append(node1+":"+node2)
		MG.add_edge(node1, node2)

	file.close()
	#####################################
	##
	## dict to save measures
	##
	#####################################
	dictProp={}
	for node in MG.nodes():
		dictProp[node]={"average_shortest_path_length":'', "clustering_coefficient":'0',"closeness_centrality":'', "eccentricity":'',"stress":'0',"edge_count":'',"In_degree":'0',"Out_degree":'0',"Betweenness_centrality":'', "Neighborhood_conectivity":''}

	file=None
	if pathToFiles!=None:
		file=open(pathToFiles+"/"+name,"r")
	else:
		file=open("./"+name,"r")
	####################################################################
	##
	## for in degree and out degree
	##
	#################################################################### 
	for line in file:
		splittedLine=line.split("\t")
		node1=splittedLine[0]
		node2=splittedLine[1]
		dictProp[node1]["Out_degree"]=str(int(dictProp[node1]["Out_degree"])+1)
		dictProp[node2]["In_degree"]=str(int(dictProp[node2]["In_degree"])+1)
		
	file.close()			
				
	#we will see subgraphs
	subGS=list(nx.connected_component_subgraphs(MG))
	#now we will rebuild these graphs as multidigraphs
	for subG in list(nx.connected_component_subgraphs(MG)):
		#first step: create a multidigraph
		md=nx.MultiDiGraph()
		whitoutSL=nx.MultiGraph() #a graph without selfloops
		directed=nx.DiGraph()
		MDNoSelfLoop=nx.MultiDiGraph() #a graph without selfloops
		#the second step is to loop over the edges, searching for the direction of interaction
		for edge in nx.edges(subG):
			nodeX, nodeY=edge

			#if is a self interaction
			if nodeX==nodeY:
				md.add_edge(nodeX,nodeY)
				directed.add_edge(nodeX,nodeY)
			else:
				#if is not a self interaction I will look for the directions (if exist A:B and/or B:A) and Ill add the edge
				cont=0
				if nodeX+":"+nodeY in listOfInteractions:
					md.add_edge(nodeX,nodeY)
					directed.add_edge(nodeX,nodeY)
					whitoutSL.add_edge(nodeX,nodeY)
					MDNoSelfLoop.add_edge(nodeX,nodeY)
				if nodeY+":"+nodeX in listOfInteractions:
					md.add_edge(nodeY,nodeX)
					whitoutSL.add_edge(nodeY,nodeX)
					directed.add_edge(nodeY,nodeX)
					MDNoSelfLoop.add_edge(nodeY,nodeX)
					
					
		####################################################################
		##
		##							Metrics
		##
		####################################################################				

		for node in md.nodes():
			####################################################################
			##
			##					Edge count
			##
			####################################################################
			dictProp[node]["edge_count"]=str(int(dictProp[node]["Out_degree"])+int(dictProp[node]["In_degree"]))
		
			####################################################################
			##
			##					average shortest path length			
			##
			####################################################################
			
			#at this point we have directed subgraphs, so now is time to comute average shortest path of each subgraph
	
			#first we will compute shortest path of one node, then we will compute average shortest path length
			shortestPaths=nx.shortest_path_length(md, source=node)
			
			summatory=0
			cont=0
			for item in shortestPaths.items():
				summatory+=float(item[1])
				cont+=1
			if (cont-1)!=0:
				dictProp[node]["average_shortest_path_length"]=str(summatory/(cont-1))
				#print node,(summatory/(cont-1))
			else:
				dictProp[node]["average_shortest_path_length"]="0"
			####################################################################
			##
			##					eccentricity			
			##
			####################################################################
			higher=0
			for paths in shortestPaths.items():
				if int(paths[1])>higher:
					higher=int(paths[1])
			dictProp[node]["eccentricity"]=str(higher)
							

		
		####################################################################
		##
		##					closeness centrality			
		##
		####################################################################

		for item in (nx.closeness_centrality(md, normalized=False)).items():
			dictProp[item[0]]["closeness_centrality"]=str(item[1])		
			
		####################################################################
		##
		##					neighborhood connectivity			
		##
		####################################################################					
		
		for item in (nx.average_neighbor_degree(whitoutSL)).items():
			dictProp[item[0]]["Neighborhood_conectivity"]=str(item[1])

		####################################################################
		##
		##					stress centrality		
		##
		####################################################################		
		for Source in md.nodes():
			for Target in md.nodes():
				if Source!=Target:
					try:
						for path in nx.all_shortest_paths(md,source=Source,target=Target):
							if len(path)>2:
								for N in path[1:-1]:
									dictProp[N]["stress"]=str(int(dictProp[N]["stress"])+1)
					except:
						pass
					
		####################################################################
		##
		##					betweenness centrality		
		##
		####################################################################		
		for item in (nx.betweenness_centrality(md)).items():
			dictProp[item[0]]["Betweenness_centrality"]=str(item[1])
		 
		
		####################################################################
		##
		##					clustering coefficient			
		##
		####################################################################
		
		for node in MDNoSelfLoop.nodes():
			inPlusOut=float(dictProp[node]["Out_degree"])+float(dictProp[node]["In_degree"])
			division=(len(whitoutSL.neighbors(node))*(len(whitoutSL.neighbors(node))-1))	
			if len(whitoutSL.neighbors(node))>1: #if node has at least two neighbour
				connectedNeighbors=0
				neighbors=whitoutSL.neighbors(node)
				for neighbor in neighbors:
					#print neighbor
					neighborsOfNeighbors=MDNoSelfLoop.neighbors(neighbor)
					#print neighbor, neighborsOfNeighbors
					for n in neighborsOfNeighbors:
						#print n
						if n in neighbors:
							connectedNeighbors+=1
				dictProp[node]["clustering_coefficient"]=str(float(connectedNeighbors)/division)

	outFile=None
	if Result!=None:
		outFile=open(Result+"/"+name[:-4]+".csv","w")
	else:
		outFile=open("./"+name[:-4]+".csv","w")
		
	outFile.write("\"AverageShortestPathLength\",\"BetweennessCentrality\",\"ClosenessCentrality\",\"ClusteringCoefficient\",\"Eccentricity\",\"EdgeCount\",\"Indegree\",\"name\",\"NeighborhoodConnectivity\",\"Outdegree\",\"Stress\"\n")
	for item in dictProp.items():
		node=item[0]
		outFile.write("\""+dictProp[node]["average_shortest_path_length"]+"\",\""+dictProp[node]["Betweenness_centrality"]+"\",\""+dictProp[node]["closeness_centrality"]+"\",\""+dictProp[node]["clustering_coefficient"]+"\",\""+dictProp[node]["eccentricity"]+"\",\""+dictProp[node]["edge_count"]+"\",\""+dictProp[node]["In_degree"]+"\",\""+node+"\",\""+dictProp[node]["Neighborhood_conectivity"]+"\",\""+dictProp[node]["Out_degree"]+"\",\""+dictProp[node]["stress"]+"\"\n")

	outFile.close()
Example #49
0
def extended_stats(G, connectivity=False, anc=False, ecc=False, bc=False, cc=False):
    """
    Calculate extended topological stats and metrics for a graph.

    Many of these algorithms have an inherently high time complexity. Global
    topological analysis of large complex networks is extremely time consuming
    and may exhaust computer memory. Consider using function arguments to not
    run metrics that require computation of a full matrix of paths if they
    will not be needed.

    Parameters
    ----------
    G : networkx multidigraph
    connectivity : bool
        if True, calculate node and edge connectivity
    anc : bool
        if True, calculate average node connectivity
    ecc : bool
        if True, calculate shortest paths, eccentricity, and topological metrics
        that use eccentricity
    bc : bool
        if True, calculate node betweenness centrality
    cc : bool
        if True, calculate node closeness centrality

    Returns
    -------
    stats : dict
        dictionary of network measures containing the following elements (some
        only calculated/returned optionally, based on passed parameters):

          - avg_neighbor_degree
          - avg_neighbor_degree_avg
          - avg_weighted_neighbor_degree
          - avg_weighted_neighbor_degree_avg
          - degree_centrality
          - degree_centrality_avg
          - clustering_coefficient
          - clustering_coefficient_avg
          - clustering_coefficient_weighted
          - clustering_coefficient_weighted_avg
          - pagerank
          - pagerank_max_node
          - pagerank_max
          - pagerank_min_node
          - pagerank_min
          - node_connectivity
          - node_connectivity_avg
          - edge_connectivity
          - eccentricity
          - diameter
          - radius
          - center
          - periphery
          - closeness_centrality
          - closeness_centrality_avg
          - betweenness_centrality
          - betweenness_centrality_avg

    """

    stats = {}
    full_start_time = time.time()

    # create a DiGraph from the MultiDiGraph, for those metrics that require it
    G_dir = nx.DiGraph(G)

    # create an undirected Graph from the MultiDiGraph, for those metrics that
    # require it
    G_undir = nx.Graph(G)

    # get the largest strongly connected component, for those metrics that
    # require strongly connected graphs
    G_strong = get_largest_component(G, strongly=True)

    # average degree of the neighborhood of each node, and average for the graph
    avg_neighbor_degree = nx.average_neighbor_degree(G)
    stats['avg_neighbor_degree'] = avg_neighbor_degree
    stats['avg_neighbor_degree_avg'] = sum(avg_neighbor_degree.values())/len(avg_neighbor_degree)

    # average weighted degree of the neighborhood of each node, and average for
    # the graph
    avg_weighted_neighbor_degree = nx.average_neighbor_degree(G, weight='length')
    stats['avg_weighted_neighbor_degree'] = avg_weighted_neighbor_degree
    stats['avg_weighted_neighbor_degree_avg'] = sum(avg_weighted_neighbor_degree.values())/len(avg_weighted_neighbor_degree)

    # degree centrality for a node is the fraction of nodes it is connected to
    degree_centrality = nx.degree_centrality(G)
    stats['degree_centrality'] = degree_centrality
    stats['degree_centrality_avg'] = sum(degree_centrality.values())/len(degree_centrality)

    # calculate clustering coefficient for the nodes
    stats['clustering_coefficient'] = nx.clustering(G_undir)

    # average clustering coefficient for the graph
    stats['clustering_coefficient_avg'] = nx.average_clustering(G_undir)

    # calculate weighted clustering coefficient for the nodes
    stats['clustering_coefficient_weighted'] = nx.clustering(G_undir, weight='length')

    # average clustering coefficient (weighted) for the graph
    stats['clustering_coefficient_weighted_avg'] = nx.average_clustering(G_undir, weight='length')

    # pagerank: a ranking of the nodes in the graph based on the structure of
    # the incoming links
    pagerank = nx.pagerank(G_dir, weight='length')
    stats['pagerank'] = pagerank

    # node with the highest page rank, and its value
    pagerank_max_node = max(pagerank, key=lambda x: pagerank[x])
    stats['pagerank_max_node'] = pagerank_max_node
    stats['pagerank_max'] = pagerank[pagerank_max_node]

    # node with the lowest page rank, and its value
    pagerank_min_node = min(pagerank, key=lambda x: pagerank[x])
    stats['pagerank_min_node'] = pagerank_min_node
    stats['pagerank_min'] = pagerank[pagerank_min_node]

    # if True, calculate node and edge connectivity
    if connectivity:
        start_time = time.time()

        # node connectivity is the minimum number of nodes that must be removed
        # to disconnect G or render it trivial
        stats['node_connectivity'] = nx.node_connectivity(G_strong)

        # edge connectivity is equal to the minimum number of edges that must be
        # removed to disconnect G or render it trivial
        stats['edge_connectivity'] = nx.edge_connectivity(G_strong)
        log('Calculated node and edge connectivity in {:,.2f} seconds'.format(time.time() - start_time))

    # if True, calculate average node connectivity
    if anc:
        # mean number of internally node-disjoint paths between each pair of
        # nodes in G, i.e., the expected number of nodes that must be removed to
        # disconnect a randomly selected pair of non-adjacent nodes
        start_time = time.time()
        stats['node_connectivity_avg'] = nx.average_node_connectivity(G)
        log('Calculated average node connectivity in {:,.2f} seconds'.format(time.time() - start_time))

    # if True, calculate shortest paths, eccentricity, and topological metrics
    # that use eccentricity
    if ecc:
        # precompute shortest paths between all nodes for eccentricity-based
        # stats
        start_time = time.time()
        sp = {source:dict(nx.single_source_dijkstra_path_length(G_strong, source, weight='length')) for source in G_strong.nodes()}

        log('Calculated shortest path lengths in {:,.2f} seconds'.format(time.time() - start_time))

        # eccentricity of a node v is the maximum distance from v to all other
        # nodes in G
        eccentricity = nx.eccentricity(G_strong, sp=sp)
        stats['eccentricity'] = eccentricity

        # diameter is the maximum eccentricity
        diameter = nx.diameter(G_strong, e=eccentricity)
        stats['diameter'] = diameter

        # radius is the minimum eccentricity
        radius = nx.radius(G_strong, e=eccentricity)
        stats['radius'] = radius

        # center is the set of nodes with eccentricity equal to radius
        center = nx.center(G_strong, e=eccentricity)
        stats['center'] = center

        # periphery is the set of nodes with eccentricity equal to the diameter
        periphery = nx.periphery(G_strong, e=eccentricity)
        stats['periphery'] = periphery

    # if True, calculate node closeness centrality
    if cc:
        # closeness centrality of a node is the reciprocal of the sum of the
        # shortest path distances from u to all other nodes
        start_time = time.time()
        closeness_centrality = nx.closeness_centrality(G, distance='length')
        stats['closeness_centrality'] = closeness_centrality
        stats['closeness_centrality_avg'] = sum(closeness_centrality.values())/len(closeness_centrality)
        log('Calculated closeness centrality in {:,.2f} seconds'.format(time.time() - start_time))

    # if True, calculate node betweenness centrality
    if bc:
        # betweenness centrality of a node is the sum of the fraction of
        # all-pairs shortest paths that pass through node
        start_time = time.time()
        betweenness_centrality = nx.betweenness_centrality(G, weight='length')
        stats['betweenness_centrality'] = betweenness_centrality
        stats['betweenness_centrality_avg'] = sum(betweenness_centrality.values())/len(betweenness_centrality)
        log('Calculated betweenness centrality in {:,.2f} seconds'.format(time.time() - start_time))

    log('Calculated extended stats in {:,.2f} seconds'.format(time.time()-full_start_time))
    return stats
Example #50
0
graph = nx.Graph(G)
graph.remove_edges_from(loops)
# get largest connected component
# unfortunately, the iterator over the components is not guaranteed to be sorted by size
components = sorted(nx.connected_components(graph), key=len, reverse=True)
lcc = graph.subgraph(components[0])
pos=nx.spring_layout(lcc)
d = nx.degree(lcc)
#nx.draw(lcc, nodelist=d.keys(), node_size=[v * 20 for v in d.values()])
#nx.draw_networkx_labels(lcc,pos=nx.spring_layout(lcc))
#plt.show()
# code for histogram

degree_sequence=sorted(nx.degree(G).values(),reverse=True)
dmax=max(degree_sequence)
plt.loglog(degree_sequence,'b-',marker='o')
plt.title("Degree rank plot")
plt.ylabel("degree")
plt.xlabel("rank")
plt.axes([0.45,0.45,0.45,0.45])
Gcc=sorted(nx.connected_component_subgraphs(G), key = len, reverse=True)[0]
pos=nx.spring_layout(Gcc)
plt.axis('off')
nx.draw_networkx_nodes(Gcc,pos,node_size=20)
nx.draw_networkx_edges(Gcc,pos,alpha=0.4)

plt.savefig("./USA/degree_histogram_usa.png")
plt.show()
print(nx.average_neighbor_degree(G, source='in', target='in'))

Example #51
0
	def average_neighbor_degree_sum(self):
		if (self.average_neighbor_degree_dict == None):
			self.average_neighbor_degree_dict = nx.average_neighbor_degree(self.graph)
		return self.average_neighbor_degree_dict[self.node_1] + self.average_neighbor_degree_dict[self.node_2]
def has_large_inout_nodes(g, cutoff=5):
    for n in nx.average_neighbor_degree(g).values():
        if n > cutoff:
            return True
    return False
Example #53
0
 def avg_degree(self):
     ''' Return average number of degree for each node
     '''
     return nx.average_neighbor_degree(self._graph)
def neighdeg(network):
    return distri(nx.average_neighbor_degree(network).values(),'neighbor_degree')
Example #55
0
G = nx.Graph()

f = open("network", "r")
for line in f:
   fields = line.strip().split()
   G.add_edge(int(fields[0]), int(fields[1]))
f.close()

sys.stderr.write("Data load! Runtime: %s\n" % (time.time() - start))

avg_clusterings = nx.clustering(G)

sys.stderr.write("Clusering calculated! Runtime: %s\n" % (time.time() - start))

neigh_degree = nx.average_neighbor_degree(G)

sys.stderr.write("AVG Neighbor degree calculated! Runtime: %s\n" % (time.time() - start))

bet_centr = nx.betweenness_centrality(G, k = 10000)

sys.stderr.write("Betweenness centrality calculated! Runtime: %s\n" % (time.time() - start))

clo_centr = nx.closeness_centrality(G)

sys.stderr.write("Closeness centrality calculated! Runtime: %s\n" % (time.time() - start))

f = open("node_stats_approx", 'w')
for i in G:
   f.write("%d::%s::%s::%s::%s\n" % (i, avg_clusterings[i], neigh_degree[i], bet_centr[i], clo_centr[i]))
f.close()
Example #56
0
# remove parallel edges and self-loops
graph = nx.Graph(G)
graph.remove_edges_from(loops)
# get largest connected component
# unfortunately, the iterator over the components is not guaranteed to be sorted by size
components = sorted(nx.connected_components(graph), key=len, reverse=True)
lcc = graph.subgraph(components[0])
pos = nx.spring_layout(lcc)
d = nx.degree(lcc)
# nx.draw(lcc, nodelist=d.keys(), node_size=[v * 20 for v in d.values()])
# nx.draw_networkx_labels(lcc,pos=nx.spring_layout(lcc))
# plt.show()
# code for histogram

degree_sequence = sorted(nx.degree(G).values(), reverse=True)
dmax = max(degree_sequence)
plt.loglog(degree_sequence, "b-", marker="o")
plt.title("Degree rank plot")
plt.ylabel("degree")
plt.xlabel("rank")
plt.axes([0.45, 0.45, 0.45, 0.45])
Gcc = sorted(nx.connected_component_subgraphs(G), key=len, reverse=True)[0]
pos = nx.spring_layout(Gcc)
plt.axis("off")
nx.draw_networkx_nodes(Gcc, pos, node_size=20)
nx.draw_networkx_edges(Gcc, pos, alpha=0.4)

plt.savefig("./USA/degree_histogram_usa.png")
plt.show()
print(nx.average_neighbor_degree(G, source="in", target="in"))
    def transversal_measures(self):

        transversal_measures    = []
        continuous              = False
        # - V(k) 
        # all
        degree_volumes  = []
        keys            = self.degree_distribution.keys()
        for key in keys:
            degree      = self.degree_distribution[key]
            volume      = self.volume_distribution[key]
            degree_volumes.append([degree,volume])
        V_k             = self.Stats.get_dependency(degree_volumes)
        #       - getting the aggregate dependency
        V_k_agg         = self.Stats.aggregate_distribution(V_k, self.aggregate_number)
        transversal_measures.append(V_k_agg)
        #       - adding the sd of the real distribution
        transversal_measures.append(V_k[2])
        # storing KS and Rsquared
        self.Stats.kolmogorov_smirnov(V_k[1],V_k_agg[1],continuous)
        self.Stats.r_square([x[0] for x in degree_volumes],[x[1] for x in degree_volumes])


        # in
        in_degree_volumes   = []
        keys = []
        keys                = self.in_degree_distribution.keys()
        for key in keys:
            in_degree       = self.in_degree_distribution[key]
            in_volume       = self.in_volume_distribution[key]
            in_degree_volumes.append([in_degree,in_volume])
        V_k_in              = self.Stats.get_dependency(in_degree_volumes)
        #       - getting the aggregate dependency
        V_k_in_agg         = self.Stats.aggregate_distribution(V_k_in, self.aggregate_number)
        transversal_measures.append(V_k_in_agg)        
        #       - adding the sd of the real distribution
        transversal_measures.append(V_k_in[2])
        # storing KS and Rsquared
        self.Stats.kolmogorov_smirnov(V_k_in[1],V_k_in_agg[1],continuous)
        self.Stats.r_square([x[0] for x in in_degree_volumes],[x[1] for x in in_degree_volumes])


        # out
        out_degree_volumes   = []
        keys = []
        keys                = self.out_degree_distribution.keys()
        for key in keys:
            out_degree       = self.out_degree_distribution[key]
            out_volume       = self.out_volume_distribution[key]
            out_degree_volumes.append([out_degree,out_volume])
        V_k_out              = self.Stats.get_dependency(out_degree_volumes)
        #       - getting the aggregate dependency
        V_k_out_agg         = self.Stats.aggregate_distribution(V_k_out, self.aggregate_number)
        transversal_measures.append(V_k_out_agg)        
        #       - adding the sd of the real distribution
        transversal_measures.append(V_k_out[2])
        # storing KS and Rsquared
        self.Stats.kolmogorov_smirnov(V_k_out[1],V_k_out_agg[1],continuous) 
        self.Stats.r_square([x[0] for x in out_degree_volumes],[x[1] for x in out_degree_volumes])


        # - C(k)
        G_undirected                    = self.G.to_undirected()
        undirected_degree_distribution  = G_undirected.degree()

        # unweighted cluster
        degree_unweighted_clusters  = []
        keys                        = undirected_degree_distribution.keys()
        for key in keys:
            degree                  = undirected_degree_distribution[key]
            unweighted_cluster      = self.unweighted_clustering_distribution[key]
            degree_unweighted_clusters.append([degree,unweighted_cluster])
        C_k_unweighted              = self.Stats.get_dependency(degree_unweighted_clusters)
        #       - getting the aggregate dependency
        C_k_unweighted_agg          = self.Stats.aggregate_distribution(C_k_unweighted, self.aggregate_number)
        transversal_measures.append(C_k_unweighted_agg)
        #       - adding the sd of the real distribution
        transversal_measures.append(C_k_unweighted[2])
        # storing KS and Rsquared
        self.Stats.kolmogorov_smirnov(C_k_unweighted[1],C_k_unweighted_agg[1],continuous)
        self.Stats.r_square([x[0] for x in degree_unweighted_clusters],[x[1] for x in degree_unweighted_clusters])


        # weighted cluster
        degree_weighted_clusters    = []
        # keys = self.degree_distribution.keys()
        for key in keys:
            degree                  = undirected_degree_distribution[key]
            weighted_cluster        = self.weighted_clustering_distribution[key]
            degree_weighted_clusters.append([degree,weighted_cluster])
        C_k_weighted                = self.Stats.get_dependency(degree_weighted_clusters)
        #       - getting the aggregate dependency
        C_k_weighted_agg          = self.Stats.aggregate_distribution(C_k_weighted, self.aggregate_number)
        transversal_measures.append(C_k_weighted_agg)
        #       - adding the sd of the real distribution
        transversal_measures.append(C_k_weighted[2])
        # storing KS and Rsquared
        self.Stats.kolmogorov_smirnov(C_k_weighted[1],C_k_weighted_agg[1],continuous)
        self.Stats.r_square([x[0] for x in degree_weighted_clusters],[x[1] for x in degree_weighted_clusters])      

        # - Vij
        # average weight of links for Ki*Kj
        edges_volume_degree = []
        for edge in self.G.edges(data = True):
            node1_degree            = self.out_degree_distribution[edge[0]]
            node2_degree            = self.in_degree_distribution[edge[1]]
            weight                  = edge[2][self.weight_id]
            edges_volume_degree.append([node1_degree*node2_degree, weight])
        volume_end_point_degree     = self.Stats.get_dependency(edges_volume_degree)
        transversal_measures.append(volume_end_point_degree)

        # - Knn
        # unweighted
        # undirected
        average_neighbor_degrees        = nx.average_neighbor_degree(self.G)
        average_neighbor_degree_k       = []
        for key in keys:
            degree                      = undirected_degree_distribution[key]
            average_neighbor_degree     = average_neighbor_degrees[key]
            average_neighbor_degree_k.append([degree,average_neighbor_degree]) 
        average_neighbor_degree_k_dep   = self.Stats.get_dependency(average_neighbor_degree_k)
        # adding to the general values
        [average_neighbor_degree_mean, average_neighbor_degree_sd] = self.Stats.get_mean_sd(average_neighbor_degrees)
        self.features[0].append(average_neighbor_degree_mean)
        self.features[0].append(average_neighbor_degree_sd)
        #       - getting the aggregate dependency
        average_neighbor_degree_k_agg   = self.Stats.aggregate_distribution(average_neighbor_degree_k_dep,
                                                               self.aggregate_number)
        transversal_measures.append(average_neighbor_degree_k_agg)
        #       - adding the sd of the real distribution
        transversal_measures.append(average_neighbor_degree_k_dep[2])
        #       - computing the KS and R square test
        self.Stats.kolmogorov_smirnov(average_neighbor_degree_k_dep[1], average_neighbor_degree_k_agg[1], continuous)
        self.Stats.r_square([x[0] for x in average_neighbor_degree_k],[x[1] for x in average_neighbor_degree_k])


        # weighted
        # undirected
        average_neighbor_degrees_weighted       = nx.average_neighbor_degree(self.G, weight = self.weight_id)
        average_neighbor_degree_weighted_k      = []
        for key in keys:
            degree                              = undirected_degree_distribution[key]
            average_neighbor_degree_weighted    = average_neighbor_degrees_weighted[key]
            average_neighbor_degree_weighted_k.append([degree,average_neighbor_degree_weighted]) 
        average_neighbor_degree_weighted_k_dep  = self.Stats.get_dependency(average_neighbor_degree_weighted_k)
        # adding to the general values
        [average_neighbor_degree_weighted_mean, average_neighbor_degree_weighted_sd] = self.Stats.get_mean_sd(average_neighbor_degrees_weighted)
        self.features[0].append(average_neighbor_degree_weighted_mean)
        self.features[0].append(average_neighbor_degree_weighted_sd)  
        #       - getting the aggregate dependency
        average_neighbor_degree_weighted_k_agg   = self.Stats.aggregate_distribution(average_neighbor_degree_weighted_k_dep,
                                                               self.aggregate_number)
        transversal_measures.append(average_neighbor_degree_weighted_k_agg)
        #       - adding the sd of the real distribution
        transversal_measures.append(average_neighbor_degree_weighted_k_dep[2])
        #       - computing the KS and R square test
        self.Stats.kolmogorov_smirnov(average_neighbor_degree_weighted_k_dep[1], average_neighbor_degree_weighted_k_agg[1], continuous)
        self.Stats.r_square([x[0] for x in average_neighbor_degree_weighted_k],[x[1] for x in average_neighbor_degree_weighted_k])

        self.features.append(transversal_measures)
Example #58
0
 def test_degree_k4_nodes(self):
     G=nx.complete_graph(4)
     answer={1:3.0,2:3.0}
     nd = nx.average_neighbor_degree(G,nodes=[1,2])
     assert_equal(nd,answer)
Example #59
0
G=nx.DiGraph()
 
filename = "TF_analysis.csv"

csvfile = open(filename) #1~300

# print G.node


for row in csv.reader(csvfile):
   G.add_edges_from([(row[0],row[1])],weight=row[2])


print "average_neighbor_degree"
print nx.average_neighbor_degree(G) 

print "degree_assortativity_coefficient"
print nx.degree_assortativity_coefficient(G)

print "degree_pearson_correlation_coefficient"
print nx.degree_pearson_correlation_coefficient(G)  
#print nx.k_nearest_neighbors(G)


print "bipartite.closeness_centrality"
print bipartite.closeness_centrality(G,G.node)

print "degree_centrality"
print nx.degree_centrality(G)
Example #60
0
m = 8
n = 137
  
np.Array1 = n*[m*[0]]

for row in csv.reader(csvfile):
   G.add_edges_from([(row[0],row[1])],weight=row[2])
   Gene_name.append(row[0])

#queue = deque([row[0]])
#print G.node
#print Gene_name

#Model of comparing networks
print "average_neighbor_degree"
dict = nx.average_neighbor_degree(G) 

# print dict

#異常検出のリスト
put_aside = []

for x in Gene_name:
   y = str(x)
   put_aside.append(dict[y])

list1 = dict.keys()
list2 = dict.values()


print nx.triangles(G,0)