def solve(G): domset = min_weighted_dominating_set(G) ranklist = [i for i in domset] ranklist.sort(reverse=True, key=lambda x: len(G[x])) graph3 = nx.Graph() special = chkspecialcase(G) if special: graph3.add_node(special) return graph3 for j in ranklist: if j == len(ranklist) - 1: return graph3 k = j + 1 while k < len(ranklist): if nx.has_path(G, j, k): #graph3.add_edges_from(astar_path(G, j, j + 1,G.edges()['weights'])) #graph3.add_edges_from(nx.astar_path(G, j, j + 1)) a = nx.astar_path(G, j, k) for i in range(len(a) - 1): graph3.add_weighted_edges_from([ (a[i], a[i + 1], G[a[i]][a[i + 1]]['weight']) ]) k += 1 return graph3
def helper2(G): T = nx.minimum_spanning_tree(G) curr_lowest = average_pairwise_distance(T) curr_lowest_tree = T S = min_weighted_dominating_set(T) newG = nx.subgraph(T, S) ncc = nx.number_connected_components(newG) ccs = list(connected_components(newG)) for i in range(len(ccs) - 1): curr_node = ccs[i].pop() ccs[i].add(curr_node) next_node = ccs[i + 1].pop() ccs[i + 1].add(next_node) path = nx.dijkstra_path(G, curr_node, next_node) for n in path: if (n not in list(newG.nodes)): S.add(n) newG = nx.subgraph(G, S) newT = nx.minimum_spanning_tree(newG) if (is_valid_network(G, newT)): apd = average_pairwise_distance(newT) if (apd < curr_lowest): curr_lowest = apd curr_lowest_tree = newT return curr_lowest_tree
def solve(G): """ Directly calls MST algorithm to link together the nodes of the dominatingSet. Only includes edges that link together two dominating set vertices. Args: G: networkx.Graph Returns: T: networkx.Graph """ temp = nx.Graph() dominatingSet = min_weighted_dominating_set(G, weight="weight") temp.add_nodes_from(dominatingSet) for node in dominatingSet: for node2 in dominatingSet: if G.has_edge(node, node2): temp.add_edge(node, node2) temp[node][node2]['weight'] = G.get_edge_data(node, node2)['weight'] # Get MST of dominating set edges = list( nx.minimum_spanning_edges(temp, algorithm='kruskal', weight='weight', keys=True, data=True, ignore_nan=False)) T = nx.Graph() T.add_nodes_from(dominatingSet) T.add_edges_from(edges) return T
def dominating_set(num_seeds, G): max_set = min_weighted_dominating_set(G) seeds = random.sample(max_set, min(num_seeds, len(max_set))) if len(seeds) < num_seeds: seeds.extend(random.sample(set(G.nodes())-set(seeds), num_seeds-len(seeds))) # seeds = list(set(seeds)) return seeds
def get_dominating_set(G, x_vals, y_vals): vertices_1 = min_weighted_dominating_set(G) vertices_2 = dominating_set(G) # print("no of chosen vertices with min_weighted_dominating_set are ", len(vertices_1)) # "and they are ", vertices_1) # print("no of chosen vertices with dominating_set are ", len(vertices_2)) # "and they are ", vertices_2) # print("weight = ", G.get_edge_data(2,60)) # compare_graph(vertices_1, vertices_2 , x_vals, y_vals) return len(vertices_2)
def mst_aprox(self, G, start): self.dominatingset = list(approximation.min_weighted_dominating_set(G, 'weight')) self.bell = dict(algo.shortest_paths.all_pairs_bellman_ford_path(G)) self.intactset = self.dominatingset[:] self.current = self.start self.queue={} self.copydomi = self.dominatingset[:] self.shortest = 4000000000 cal = 0 while(len(self.dominatingset) != 0): for i in self.dominatingset: cal += self.calculateweight(self.bell[self.current][i]) self.queue[i] = [cal, self.bell[self.current][i]] self.goal = min(self.queue, key = self.queue.get) self.walk += self.queue[self.goal][1][1:] self.queue={} self.current = self.goal self.dominatingset.remove(self.goal) self.walk += self.dijkreturn(self.current)[1:] minim = cal self.dominatingset= self.copydomi[:] self.permus = [] if self.start in self.dominatingset: self.copydomi.remove(self.start) try: iterate = itertools.permutations(self.copydomi) for i in range(self.howmanyperms): self.permus += [next(iterate)] except: iterate = itertools.permutations(self.copydomi) self.permus = list(iterate) else: try: iterate = itertools.permutations(self.copydomi) for i in range(self.howmanyperms): self.permus += [next(iterate)] except: iterate = itertools.permutations(self.copydomi) self.permus = list(iterate) for i in self.permus: self.current = self.start cal = 0 tempwalk =[] for k in i: cal += self.calculateweight(self.bell[self.current][k]) tempwalk += self.bell[self.current][k][1:] self.current = k cal += self.calculateweight(self.bell[self.current][self.start]) tempwalk += self.bell[self.current][self.start][1:] if minim > cal: self.walk = tempwalk[:] minim = cal
def getDominatingSet(G): G2 = nx.Graph(G) dominating_set = approximation.min_weighted_dominating_set(G2) for dom in dominating_set: G.nodes[dom]["shape"] = "box" return dominating_set
def greedy_mds(G): mds = min_weighted_dominating_set(G) final = G.copy() for node in G.copy(): if node not in mds: final.remove_node(node) if not nx.is_connected(final): return mwrc_approx(G) return mwrc_approx(final)
def dominating_set(num_seeds, G): max_set = min_weighted_dominating_set(G) print("GOT CLIQUE") seeds = random.sample(max_set, min(num_seeds, len(max_set))) print("GOT FIRST SAMPLE") if len(seeds) < num_seeds: seeds.extend( random.sample(set(G.nodes()) - set(seeds), num_seeds - len(seeds))) # seeds = list(set(seeds)) print("GOT SECOND SAMPLE") return seeds
def test_star_graph(self): """Tests that an approximate dominating set for the star graph, even when the center node does not have the smallest integer label, gives just the center node. For more information, see #1527. """ # Create a star graph in which the center node has the highest # label instead of the lowest. G = nx.star_graph(10) G = nx.relabel_nodes(G, {0: 9, 9: 0}) eq_(min_weighted_dominating_set(G), {9})
def generate_random_new_way(self): og = min_weighted_dominating_set(self.graph, "weight") start_node = og.pop() holder = set() while og: curr_node = og.pop() holder.update(dijkstra_path(self.graph, start_node, curr_node)) if holder and is_valid_network(self.graph, self.graph.subgraph(holder)): self.network = nx.minimum_spanning_tree( self.graph.subgraph(holder)) else: self.generate_random_old_way(high_degree=True)
def test_min_weighted_dominating_set(self): graph = nx.Graph() graph.add_edge(1, 2) graph.add_edge(1, 5) graph.add_edge(2, 3) graph.add_edge(2, 5) graph.add_edge(3, 4) graph.add_edge(3, 6) graph.add_edge(5, 6) vertices = set([1, 2, 3, 4, 5, 6]) # due to ties, this might be hard to test tight bounds dom_set = min_weighted_dominating_set(graph) for vertex in vertices - dom_set: neighbors = set(graph.neighbors(vertex)) ok_(len(neighbors & dom_set) > 0, "Non dominating set found!")
def test_min_weighted_dominating_set(self): graph = nx.Graph() graph.add_edge(1, 2) graph.add_edge(1, 5) graph.add_edge(2, 3) graph.add_edge(2, 5) graph.add_edge(3, 4) graph.add_edge(3, 6) graph.add_edge(5, 6) vertices = set([1, 2, 3, 4, 5, 6]) # due to ties, this might be hard to test tight bounds dom_set = apxa.min_weighted_dominating_set(graph) for vertex in vertices - dom_set: neighbors = set(graph.neighbors(vertex)) ok_(len(neighbors & dom_set) > 0, "Non dominating set found!")
def strategy_4(input_filename, num_seeds): x = time.clock() G = nx.Graph() degrees = {} Gconnected = nx.Graph() #print("Graph loaded") for line in graph.items(): node_id = int(line[0]) neighbors = line[1] degrees[node_id] = len(neighbors) for neighbor in neighbors: G.add_edge(node_id, int(neighbor)) Gconnected.add_edge(node_id, int(neighbor)) if (len(neighbors) ==0): G.add_node(node_id) dominating = apxa.min_weighted_dominating_set(G) #dominating = max(nx.connected_components(Gconnected), key= lambda x:len(x)) complement = set(G.nodes()) - dominating #print(nx.number_of_nodes(G)) #dominating = set(random.sample(G.nodes(), .9*nx.number_of_nodes(G))) #dominating = set(G.nodes()) print("Dominating done") ha = time.clock() centralities = nx.algorithms.centrality.subgraph_centrality(G) print("subgraph done: " + str(time.clock()-ha)) whoa = time.clock() centralities2 = nx.algorithms.centrality.betweenness_centrality_subset(G, list(dominating), list(complement)) #centralities2 = nx.algorithms.centrality.closeness_centrality(G) #centralities2 = nx.algorithms.centrality.degree_centrality(G) print("Degree done:" + str(time.clock()-whoa)) #centralities3 = nx.algorithms.centrality.harmonic_centrality(G, nbunch=dominating) ay = time.clock() centralities3 = nx.algorithms.centrality.eigenvector_centrality(G, max_iter=150, tol=1.0*10**-6) print("Harmonic done: " + str(time.clock() - ay)) welp = time.clock() clusters = {} for node in dominating: c = nx.algorithms.cluster.clustering(G, node) clusters[node] = c print("Cluster done: " + str(time.clock()- welp)) lol = time.clock() influence = {} for node in dominating: influence[node] = calc_centrality2(G, node, centralities, centralities2, centralities3, clusters, dominating) print("Neighbors: " + str(time.clock()-lol)) # Normalize degrees for weighting with other heuristics # centrality = np.array(centrality) / np.linalg.norm(centrality) sorted_inf = sorted(influence.keys(), key=lambda x : influence[x]) result = sorted_inf[-int(num_seeds):] # degrees = nx.algorithms.centrality.degree_centrality(G) # max_degrees = sorted(degrees.keys(), key=lambda x: degrees[x])[-int(num_seeds)+a:] # count = 1 # done = False # while(not done): # changed = False # for node in result: # if node in max_degrees: # result.remove(node) # result.append(sorted_inf[-int(num_seeds)-count]) # count += 1 # changed = True # if not changed: # done = True print(time.clock() - x) # dominating = list(dominating) # max_degrees.extend(result[-a:]) return [str(i) for i in result]
def solve(G): """ Idea for solve method below: is to basically find shortest paths (via Dijkstras) between each pair of vertices within the dominating set. Put all the vertices within the dominating set into a new graph G_prime, and add edges between each pair of vertices. Introduce new vertices into the graph as necessary to ensure connectivity. In the end, return the MST of G_prime. Also calls extra-vertex-optimization in the end. Args: G: networkx.Graph Returns: T: networkx.Graph """ # TODO: your code here! dominatingSet = min_weighted_dominating_set(G, weight="weight") # The variable defined below, apsp, stands for all pairs shortest paths from calling NetworkX built-in Dijkstra's algorithm. apsp = dict( nx.algorithms.shortest_paths.weighted.all_pairs_dijkstra( G, weight="weight")) # G_prime, the new graph below, shall consist of all vertices within the dominating set along with their shortest path edge weights in # between, bringing in new vertices as necessary. G_prime = nx.Graph() G.add_nodes_from(dominatingSet) # Vertices to add contains new vertices which must be added into graph G prime in order to ensure connectivity of nodes from min # dominating set. extra_vertices = set() for node in dominatingSet: for node2 in dominatingSet: shortest_path = apsp[node][1][node2] # First, identify new vertices to be thrown into G prime. for vertex in shortest_path: if vertex not in dominatingSet: G_prime.add_node( vertex ) # I do believe from my Internet search that duplicate nodes has no effect extra_vertices.add( vertex ) # Keep track of the list of all vertices within the dominating set # Next, identify new edges to be thrown into G prime. Adding edges more than once has no effect. for i in range(len(shortest_path) - 1): origin_vertex = shortest_path[i] terminus_vertex = shortest_path[i + 1] w = G.get_edge_data(origin_vertex, terminus_vertex)['weight'] G_prime.add_edge(origin_vertex, terminus_vertex, weight=w) final_edges = list( nx.minimum_spanning_edges(G_prime, algorithm='kruskal', weight='weight', keys=True, data=True, ignore_nan=False)) T = nx.Graph() T.add_nodes_from(dominatingSet) T.add_nodes_from(extra_vertices) T.add_edges_from(final_edges) current_average = average_pairwise_distance(T) last_average = 4000 print(current_average) # Until adding more edges doesn't improve the average pairwise cost while current_average < last_average: last_average = current_average # For every node in T for node in nx.dfs_preorder_nodes(T, source=list(T.nodes)[0]): neighbors = nx.algorithms.traversal.breadth_first_search.bfs_tree( G, node, reverse=False, depth_limit=1) # Get one of its neighbors NOT in T for node2 in neighbors: # and add the edge between that vertex and its neighbor # if it decreases the average pairwise cost. if node2 not in T and G.get_edge_data(node, node2)\ and G[node][node2]['weight'] < current_average: T.add_node(node2) T.add_edge(node, node2, weight=G.get_edge_data(node, node2)['weight']) new_average = average_pairwise_distance(T) if new_average > current_average: T.remove_node(node2) #T.remove_edge(node, node2) else: current_average = new_average print("Adding an edge between", node, "and", node2, "yields average", new_average) print("Dominating vertices:", [node for node in T]) return T
def compute_summaries(G): """ Compute network features, computational times and their nature. Evaluate 54 summary statistics of a network G, plus 4 noise variables, store the computational time to evaluate each summary statistic, and keep track of their nature (discrete or not). Args: G (networkx.classes.graph.Graph): an undirected networkx graph. Returns: resDicts (tuple): a tuple containing the elements: - dictSums (dict): a dictionary with the name of the summaries as keys and the summary statistic values as values; - dictTimes (dict): a dictionary with the name of the summaries as keys and the time to compute each one as values; - dictIsDist (dict): a dictionary indicating if the summary is discrete (True) or not (False). """ dictSums = dict() # Will store the summary statistic values dictTimes = dict() # Will store the evaluation times dictIsDisc = dict() # Will store the summary statistic nature # Extract the largest connected component Gcc = sorted(nx.connected_components(G), key=len, reverse=True) G_lcc = G.subgraph(Gcc[0]) # Number of edges start = time.time() dictSums["num_edges"] = G.number_of_edges() dictTimes["num_edges"] = time.time() - start dictIsDisc["num_edges"] = True # Number of connected components start = time.time() dictSums["num_of_CC"] = nx.number_connected_components(G) dictTimes["num_of_CC"] = time.time() - start dictIsDisc["num_of_CC"] = True # Number of nodes in the largest connected component start = time.time() dictSums["num_nodes_LCC"] = nx.number_of_nodes(G_lcc) dictTimes["num_nodes_LCC"] = time.time() - start dictIsDisc["num_nodes_LCC"] = True # Number of edges in the largest connected component start = time.time() dictSums["num_edges_LCC"] = G_lcc.number_of_edges() dictTimes["num_edges_LCC"] = time.time() - start dictIsDisc["num_edges_LCC"] = True # Diameter of the largest connected component start = time.time() dictSums["diameter_LCC"] = nx.diameter(G_lcc) dictTimes["diameter_LCC"] = time.time() - start dictIsDisc["diameter_LCC"] = True # Average geodesic distance (shortest path length in the LCC) start = time.time() dictSums["avg_geodesic_dist_LCC"] = nx.average_shortest_path_length(G_lcc) dictTimes["avg_geodesic_dist_LCC"] = time.time() - start dictIsDisc["avg_geodesic_dist_LCC"] = False # Average degree of the neighborhood of each node start = time.time() dictSums["avg_deg_connectivity"] = np.mean( list(nx.average_degree_connectivity(G).values())) dictTimes["avg_deg_connectivity"] = time.time() - start dictIsDisc["avg_deg_connectivity"] = False # Average degree of the neighbors of each node in the LCC start = time.time() dictSums["avg_deg_connectivity_LCC"] = np.mean( list(nx.average_degree_connectivity(G_lcc).values())) dictTimes["avg_deg_connectivity_LCC"] = time.time() - start dictIsDisc["avg_deg_connectivity_LCC"] = False # Recover the degree distribution start_degree_extract = time.time() degree_vals = list(dict(G.degree()).values()) degree_extract_time = time.time() - start_degree_extract # Entropy of the degree distribution start = time.time() dictSums["degree_entropy"] = ss.entropy(degree_vals) dictTimes["degree_entropy"] = time.time() - start + degree_extract_time dictIsDisc["degree_entropy"] = False # Maximum degree start = time.time() dictSums["degree_max"] = max(degree_vals) dictTimes["degree_max"] = time.time() - start + degree_extract_time dictIsDisc["degree_max"] = True # Average degree start = time.time() dictSums["degree_mean"] = np.mean(degree_vals) dictTimes["degree_mean"] = time.time() - start + degree_extract_time dictIsDisc["degree_mean"] = False # Median degree start = time.time() dictSums["degree_median"] = np.median(degree_vals) dictTimes["degree_median"] = time.time() - start + degree_extract_time dictIsDisc["degree_median"] = False # Standard deviation of the degree distribution start = time.time() dictSums["degree_std"] = np.std(degree_vals) dictTimes["degree_std"] = time.time() - start + degree_extract_time dictIsDisc["degree_std"] = False # Quantile 25% start = time.time() dictSums["degree_q025"] = np.quantile(degree_vals, 0.25) dictTimes["degree_q025"] = time.time() - start + degree_extract_time dictIsDisc["degree_q025"] = False # Quantile 75% start = time.time() dictSums["degree_q075"] = np.quantile(degree_vals, 0.75) dictTimes["degree_q075"] = time.time() - start + degree_extract_time dictIsDisc["degree_q075"] = False # Average geodesic distance start = time.time() dictSums["avg_shortest_path_length_LCC"] = nx.average_shortest_path_length( G_lcc) dictTimes["avg_shortest_path_length_LCC"] = time.time() - start dictIsDisc["avg_shortest_path_length_LCC"] = False # Average global efficiency: # The efficiency of a pair of nodes in a graph is the multiplicative # inverse of the shortest path distance between the nodes. # The average global efficiency of a graph is the average efficiency of # all pairs of nodes. start = time.time() dictSums["avg_global_efficiency"] = nx.global_efficiency(G) dictTimes["avg_global_efficiency"] = time.time() - start dictIsDisc["avg_global_efficiency"] = False # Harmonic mean which is 1/avg_global_efficiency start = time.time() dictSums["harmonic_mean"] = nx.global_efficiency(G) dictTimes["harmonic_mean"] = time.time() - start dictIsDisc["harmonic_mean"] = False # Average local efficiency # The local efficiency of a node in the graph is the average global # efficiency of the subgraph induced by the neighbors of the node. # The average local efficiency is the average of the # local efficiencies of each node. start = time.time() dictSums["avg_local_efficiency_LCC"] = nx.local_efficiency(G_lcc) dictTimes["avg_local_efficiency_LCC"] = time.time() - start dictIsDisc["avg_local_efficiency_LCC"] = False # Node connectivity # The node connectivity is equal to the minimum number of nodes that # must be removed to disconnect G or render it trivial. # Only on the largest connected component here. start = time.time() dictSums["node_connectivity_LCC"] = nx.node_connectivity(G_lcc) dictTimes["node_connectivity_LCC"] = time.time() - start dictIsDisc["node_connectivity_LCC"] = True # Edge connectivity # The edge connectivity is equal to the minimum number of edges that # must be removed to disconnect G or render it trivial. # Only on the largest connected component here. start = time.time() dictSums["edge_connectivity_LCC"] = nx.edge_connectivity(G_lcc) dictTimes["edge_connectivity_LCC"] = time.time() - start dictIsDisc["edge_connectivity_LCC"] = True # Graph transitivity # 3*times the number of triangles divided by the number of triades start = time.time() dictSums["transitivity"] = nx.transitivity(G) dictTimes["transitivity"] = time.time() - start dictIsDisc["transitivity"] = False # Number of triangles start = time.time() dictSums["num_triangles"] = np.sum(list(nx.triangles(G).values())) / 3 dictTimes["num_triangles"] = time.time() - start dictIsDisc["num_triangles"] = True # Estimate of the average clustering coefficient of G: # Average local clustering coefficient, with local clustering coefficient # defined as C_i = (nbr of pairs of neighbors of i that are connected)/(nbr of pairs of neighbors of i) start = time.time() dictSums["avg_clustering_coef"] = nx.average_clustering(G) dictTimes["avg_clustering_coef"] = time.time() - start dictIsDisc["avg_clustering_coef"] = False # Square clustering (averaged over nodes): # the fraction of possible squares that exist at the node. # We average it over nodes start = time.time() dictSums["square_clustering_mean"] = np.mean( list(nx.square_clustering(G).values())) dictTimes["square_clustering_mean"] = time.time() - start dictIsDisc["square_clustering_mean"] = False # We compute the median start = time.time() dictSums["square_clustering_median"] = np.median( list(nx.square_clustering(G).values())) dictTimes["square_clustering_median"] = time.time() - start dictIsDisc["square_clustering_median"] = False # We compute the standard deviation start = time.time() dictSums["square_clustering_std"] = np.std( list(nx.square_clustering(G).values())) dictTimes["square_clustering_std"] = time.time() - start dictIsDisc["square_clustering_std"] = False # Number of 2-cores start = time.time() dictSums["num_2cores"] = len(nx.k_core(G, k=2)) dictTimes["num_2cores"] = time.time() - start dictIsDisc["num_2cores"] = True # Number of 3-cores start = time.time() dictSums["num_3cores"] = len(nx.k_core(G, k=3)) dictTimes["num_3cores"] = time.time() - start dictIsDisc["num_3cores"] = True # Number of 4-cores start = time.time() dictSums["num_4cores"] = len(nx.k_core(G, k=4)) dictTimes["num_4cores"] = time.time() - start dictIsDisc["num_4cores"] = True # Number of 5-cores start = time.time() dictSums["num_5cores"] = len(nx.k_core(G, k=5)) dictTimes["num_5cores"] = time.time() - start dictIsDisc["num_5cores"] = True # Number of 6-cores start = time.time() dictSums["num_6cores"] = len(nx.k_core(G, k=6)) dictTimes["num_6cores"] = time.time() - start dictIsDisc["num_6cores"] = True # Number of k-shells # The k-shell is the subgraph induced by nodes with core number k. # That is, nodes in the k-core that are not in the k+1-core # Number of 2-shells start = time.time() dictSums["num_2shells"] = len(nx.k_shell(G, 2)) dictTimes["num_2shells"] = time.time() - start dictIsDisc["num_2shells"] = True # Number of 3-shells start = time.time() dictSums["num_3shells"] = len(nx.k_shell(G, 3)) dictTimes["num_3shells"] = time.time() - start dictIsDisc["num_3shells"] = True # Number of 4-shells start = time.time() dictSums["num_4shells"] = len(nx.k_shell(G, 4)) dictTimes["num_4shells"] = time.time() - start dictIsDisc["num_4shells"] = True # Number of 5-shells start = time.time() dictSums["num_5shells"] = len(nx.k_shell(G, 5)) dictTimes["num_5shells"] = time.time() - start dictIsDisc["num_5shells"] = True # Number of 6-shells start = time.time() dictSums["num_6shells"] = len(nx.k_shell(G, 6)) dictTimes["num_6shells"] = time.time() - start dictIsDisc["num_6shells"] = True start = time.time() listOfCliques = list(nx.enumerate_all_cliques(G)) enum_all_cliques_time = time.time() - start # Number of 4-cliques start = time.time() n4Clique = 0 for li in listOfCliques: if len(li) == 4: n4Clique += 1 dictSums["num_4cliques"] = n4Clique dictTimes["num_4cliques"] = time.time() - start + enum_all_cliques_time dictIsDisc["num_4cliques"] = True # Number of 5-cliques start = time.time() n5Clique = 0 for li in listOfCliques: if len(li) == 5: n5Clique += 1 dictSums["num_5cliques"] = n5Clique dictTimes["num_5cliques"] = time.time() - start + enum_all_cliques_time dictIsDisc["num_5cliques"] = True # Maximal size of a clique in the graph start = time.time() dictSums["max_clique_size"] = len(approximation.clique.max_clique(G)) dictTimes["max_clique_size"] = time.time() - start dictIsDisc["max_clique_size"] = True # Approximated size of a large clique in the graph start = time.time() dictSums["large_clique_size"] = approximation.large_clique_size(G) dictTimes["large_clique_size"] = time.time() - start dictIsDisc["large_clique_size"] = True # Number of shortest path of size k start = time.time() listOfPLength = list(nx.shortest_path_length(G)) path_length_time = time.time() - start # when k = 3 start = time.time() n3Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n3Paths += tmp.count(3) dictSums["num_shortest_3paths"] = n3Paths / 2 dictTimes["num_shortest_3paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_3paths"] = True # when k = 4 start = time.time() n4Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n4Paths += tmp.count(4) dictSums["num_shortest_4paths"] = n4Paths / 2 dictTimes["num_shortest_4paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_4paths"] = True # when k = 5 start = time.time() n5Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n5Paths += tmp.count(5) dictSums["num_shortest_5paths"] = n5Paths / 2 dictTimes["num_shortest_5paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_5paths"] = True # when k = 6 start = time.time() n6Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n6Paths += tmp.count(6) dictSums["num_shortest_6paths"] = n6Paths / 2 dictTimes["num_shortest_6paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_6paths"] = True # Size of the minimum (weight) node dominating set: # A subset of nodes where each node not in the subset has for direct # neighbor a node of the dominating set. start = time.time() T = approximation.min_weighted_dominating_set(G) dictSums["size_min_node_dom_set"] = len(T) dictTimes["size_min_node_dom_set"] = time.time() - start dictIsDisc["size_min_node_dom_set"] = True # Idem but with the edge dominating set start = time.time() T = approximation.min_edge_dominating_set(G) dictSums["size_min_edge_dom_set"] = 2 * len( T) # times 2 to have a number of nodes dictTimes["size_min_edge_dom_set"] = time.time() - start dictIsDisc["size_min_edge_dom_set"] = True # The Wiener index of a graph is the sum of the shortest-path distances # between each pair of reachable nodes. For pairs of nodes in undirected graphs, # only one orientation of the pair is counted. # (On LCC otherwise inf) start = time.time() dictSums["wiener_index_LCC"] = nx.wiener_index(G_lcc) dictTimes["wiener_index_LCC"] = time.time() - start dictIsDisc["wiener_index_LCC"] = True # Betweenness node centrality (averaged over nodes): # at node u it is defined as B_u = sum_i,j sigma(i,u,j)/sigma(i,j) # where sigma is the number of shortest path between i and j going through u or not start = time.time() betweenness = list(nx.betweenness_centrality(G).values()) time_betweenness = time.time() - start # Averaged across nodes start = time.time() dictSums["betweenness_centrality_mean"] = np.mean(betweenness) dictTimes["betweenness_centrality_mean"] = time.time( ) - start + time_betweenness dictIsDisc["betweenness_centrality_mean"] = False # Maximum across nodes start = time.time() dictSums["betweenness_centrality_max"] = max(betweenness) dictTimes["betweenness_centrality_max"] = time.time( ) - start + time_betweenness dictIsDisc["betweenness_centrality_max"] = False # Central point dominance # CPD = sum_u(B_max - B_u)/(N-1) start = time.time() dictSums["central_point_dominance"] = sum( max(betweenness) - np.array(betweenness)) / (len(betweenness) - 1) dictTimes["central_point_dominance"] = time.time( ) - start + time_betweenness dictIsDisc["central_point_dominance"] = False # Estrata index : sum_i^n exp(lambda_i) # with n the number of nodes, lamda_i the i-th eigen value of the adjacency matrix of G start = time.time() dictSums["Estrata_index"] = nx.estrada_index(G) dictTimes["Estrata_index"] = time.time() - start dictIsDisc["Estrata_index"] = False # Eigenvector centrality # For each node, it is the average eigenvalue centrality of its neighbors, # where centrality of node i is taken as the i-th coordinate of x # such that Ax = lambda*x (for the maximal eigen value) # Averaged start = time.time() dictSums["avg_eigenvec_centrality"] = np.mean( list(nx.eigenvector_centrality_numpy(G).values())) dictTimes["avg_eigenvec_centrality"] = time.time() - start dictIsDisc["avg_eigenvec_centrality"] = False # Maximum start = time.time() dictSums["max_eigenvec_centrality"] = max( list(nx.eigenvector_centrality_numpy(G).values())) dictTimes["max_eigenvec_centrality"] = time.time() - start dictIsDisc["max_eigenvec_centrality"] = False ### Noise generation ### # Noise simulated from a Normal(0,1) distribution start = time.time() dictSums["noise_Gauss"] = ss.norm.rvs(0, 1) dictTimes["noise_Gauss"] = time.time() - start dictIsDisc["noise_Gauss"] = False # Noise simulated from a Uniform distribution [0-50] start = time.time() dictSums["noise_Unif"] = ss.uniform.rvs(0, 50) dictTimes["noise_Unif"] = time.time() - start dictIsDisc["noise_Unif"] = False # Noise simulated from a Bernoulli B(0.5) distribution start = time.time() dictSums["noise_Bern"] = ss.bernoulli.rvs(0.5) dictTimes["noise_Bern"] = time.time() - start dictIsDisc["noise_Bern"] = True # Noise simulated from a discrete uniform distribution [0,50[ start = time.time() dictSums["noise_disc_Unif"] = ss.randint.rvs(0, 50) dictTimes["noise_disc_Unif"] = time.time() - start dictIsDisc["noise_disc_Unif"] = True resDicts = (dictSums, dictTimes, dictIsDisc) return resDicts
def strategy_2(input_filename, num_seeds): x = time.clock() G = nx.Graph() degrees = [] Gconnected = nx.Graph() #print("Graph loaded") for line in graph.items(): node_id = int(line[0]) neighbors = line[1] degrees.append(float(len(neighbors))) for neighbor in neighbors: G.add_edge(node_id, int(neighbor)) Gconnected.add_edge(node_id, int(neighbor)) if (len(neighbors) ==0): G.add_node(node_id) dominating = apxa.min_weighted_dominating_set(G) #dominating = max(nx.connected_components(Gconnected), key= lambda x:len(x)) complement = set(G.nodes()) - dominating print("1") centralities = nx.algorithms.centrality.closeness_centrality(G) #centralities = nx.algorithms.centrality.betweenness_centrality_subset(G, list(dominating), list(dominating - vc)) centrality = [] for node in dominating: centrality.append(centralities[node]) # Normalize degrees for weighting with other heuristics centrality = np.array(centrality) / np.linalg.norm(centrality) print("2") centralities2 = nx.algorithms.centrality.betweenness_centrality_subset(G, list(dominating), list(complement)) centrality2 = [] for node in dominating: centrality2.append(centralities2[node]) # Normalize degrees for weighting with other heuristics centrality2 = np.array(centrality2) / np.linalg.norm(centrality2) print("3") centralities3 = nx.algorithms.centrality.subgraph_centrality(G) centrality3 = [] for node in dominating: centrality3.append(centralities2[node]) # Normalize degrees for weighting with other heuristics centrality3 = np.array(centrality2) / np.linalg.norm(centrality2) print("4") result = [] # for i in sorted_degrees[11:]: # result.append(i[0]) clusters = [] for node in dominating: c = nx.algorithms.cluster.clustering(G, node) clusters.append(c) clusters = np.array(clusters) / np.linalg.norm(clusters) print("5") influence = {} for node, degree in enumerate(centrality): influence[node] = 5*degree + 5*centrality2[node] + 5*centrality3[node] - 1.* clusters[node] result = sorted(influence.keys(), key=lambda x : influence[x])[-int(num_seeds):] print(time.clock() - x) dominating = list(dominating) return [str(dominating[i]) for i in result]
# diameter(b) # This will work only for graphs that are connected diameter = -1 if numberConnectedComponents == 1: diameter = nx.diameter(b) #print(diameter, sizeMaxClique) # The maximum clique is returned as a set of nodes # max_clique(b) maxClique = naa.max_clique(b) sizeMaxClique = len(maxClique) print (diameter, sizeMaxClique) # The dominating set is returned as a set of nodes # min_weighted_dominating_set(b) minDominatingSet = naa.min_weighted_dominating_set(b) sizeMinDominatingSet = len(minDominatingSet) # The number of maximal cliques in the graph # graph_number_of_cliques(b) numberOfCliques = nx.graph_number_of_cliques(b) print (numberConnectedComponents,diameter,sizeMaxClique,sizeMinDominatingSet,numberOfCliques)
def get_dominating_sets(cls, G, weight=None): """get a dominating sets """ dominating_sets = nxaa.min_weighted_dominating_set(G, weight=weight) return dominating_sets
def helper(G, start): visited2 = [] # List to keep track of visited nodes. queue2 = [] #Initialize a queue def bfs_set_weights(visited, graph, node): visited2.append(node) queue2.append(node) node_weights = {} node_weights[node] = 1 while queue2: s = queue2.pop(0) for neighbour in G.neighbors(s): if neighbour not in visited2: visited2.append(neighbour) queue2.append(neighbour) node_weight = G.get_edge_data(neighbour, s)['weight'] node_weights[neighbour] = node_weight return node_weights node_weights_dict = bfs_set_weights(visited2, G, start) nx.set_node_attributes(G, node_weights_dict, 'node_weight') D = min_weighted_dominating_set(G, 'node_weight') for node2 in list(G.nodes): remove_attribute(G, node2, 'node_weight') visited = [] # List to keep track of visited nodes. queue = [] #Initialize a queue def bfs(visited, graph, node): visited.append(node) queue.append(node) level_tracker = {} level_tracker[node] = 0 levels = {} levels[0] = [[node], [node, True]] while queue: s = queue.pop(0) for neighbour in G.neighbors(s): if neighbour not in visited: visited.append(neighbour) queue.append(neighbour) level_count = level_tracker.get(s) + 1 level_tracker[neighbour] = level_count if neighbour in D: dom_set = True else: dom_set = False if level_count in levels.keys(): if dom_set == True: levels[level_count].append( [neighbour, dom_set]) levels[level_count][0].append(neighbour) else: levels[level_count].append( [neighbour, dom_set]) else: if dom_set == True: levels[level_count] = [[neighbour], [neighbour, dom_set]] else: levels[level_count] = [[], [neighbour, dom_set]] return levels bfs_levels = bfs(visited, G, start) #print(bfs_levels) #print(list(G.edges)) #print('edge data ', G.get_edge_data(0,1,default=0)['weight']) leaf_level = max(bfs_levels.keys()) level = leaf_level d_set_levels = [] while level >= 0: if bfs_levels[level][0]: d_set = bfs_levels[level][0] d_set_levels.append(d_set) level = level - 1 newG = nx.Graph() for i in range(len(d_set_levels) - 1): d_level = d_set_levels[i] level_above = d_set_levels[i + 1] for d_node in d_level: for possible_parent in level_above: if nx.has_path(G, d_node, possible_parent): parent = possible_parent path = nx.dijkstra_path(G, d_node, parent, 'weight') path_sub = G.subgraph(path).copy() newG.update(path_sub) #print(list(newG.edges)) T = nx.minimum_spanning_tree(newG) if list(newG.edges) == []: T.add_node(d_set_levels[0][0]) return T return T