Example #1
0
def solve(G):
    domset = min_weighted_dominating_set(G)
    ranklist = [i for i in domset]
    ranklist.sort(reverse=True, key=lambda x: len(G[x]))

    graph3 = nx.Graph()
    special = chkspecialcase(G)
    if special:
        graph3.add_node(special)
        return graph3
    for j in ranklist:
        if j == len(ranklist) - 1:
            return graph3
        k = j + 1
        while k < len(ranklist):
            if nx.has_path(G, j, k):
                #graph3.add_edges_from(astar_path(G, j, j + 1,G.edges()['weights']))
                #graph3.add_edges_from(nx.astar_path(G, j, j + 1))
                a = nx.astar_path(G, j, k)
                for i in range(len(a) - 1):
                    graph3.add_weighted_edges_from([
                        (a[i], a[i + 1], G[a[i]][a[i + 1]]['weight'])
                    ])
            k += 1
    return graph3
Example #2
0
    def helper2(G):
        T = nx.minimum_spanning_tree(G)
        curr_lowest = average_pairwise_distance(T)
        curr_lowest_tree = T

        S = min_weighted_dominating_set(T)

        newG = nx.subgraph(T, S)

        ncc = nx.number_connected_components(newG)
        ccs = list(connected_components(newG))

        for i in range(len(ccs) - 1):
            curr_node = ccs[i].pop()
            ccs[i].add(curr_node)
            next_node = ccs[i + 1].pop()
            ccs[i + 1].add(next_node)
            path = nx.dijkstra_path(G, curr_node, next_node)

            for n in path:
                if (n not in list(newG.nodes)):
                    S.add(n)

            newG = nx.subgraph(G, S)
            newT = nx.minimum_spanning_tree(newG)
            if (is_valid_network(G, newT)):
                apd = average_pairwise_distance(newT)
                if (apd < curr_lowest):
                    curr_lowest = apd
                    curr_lowest_tree = newT

        return curr_lowest_tree
Example #3
0
def solve(G):
    """
    Directly calls MST algorithm to link together the nodes of the dominatingSet. Only includes edges that link together two dominating set vertices.
    Args:
        G: networkx.Graph

    Returns:
        T: networkx.Graph
    """
    temp = nx.Graph()
    dominatingSet = min_weighted_dominating_set(G, weight="weight")
    temp.add_nodes_from(dominatingSet)

    for node in dominatingSet:
        for node2 in dominatingSet:
            if G.has_edge(node, node2):
                temp.add_edge(node, node2)
                temp[node][node2]['weight'] = G.get_edge_data(node,
                                                              node2)['weight']

    # Get MST of dominating set
    edges = list(
        nx.minimum_spanning_edges(temp,
                                  algorithm='kruskal',
                                  weight='weight',
                                  keys=True,
                                  data=True,
                                  ignore_nan=False))
    T = nx.Graph()
    T.add_nodes_from(dominatingSet)
    T.add_edges_from(edges)

    return T
Example #4
0
def dominating_set(num_seeds, G):
	max_set = min_weighted_dominating_set(G)
	seeds = random.sample(max_set, min(num_seeds, len(max_set)))
	if len(seeds) < num_seeds:
		seeds.extend(random.sample(set(G.nodes())-set(seeds), num_seeds-len(seeds)))
		# seeds = list(set(seeds))
	return seeds
Example #5
0
def get_dominating_set(G, x_vals, y_vals):
    vertices_1 = min_weighted_dominating_set(G)
    vertices_2 = dominating_set(G)
    # print("no of chosen vertices with min_weighted_dominating_set are ", len(vertices_1)) # "and they are ", vertices_1)
    # print("no of chosen vertices with dominating_set are ", len(vertices_2)) # "and they are ", vertices_2)
    # print("weight = ", G.get_edge_data(2,60))
    # compare_graph(vertices_1, vertices_2 , x_vals, y_vals)
    return len(vertices_2)
Example #6
0
	def mst_aprox(self, G, start):

		self.dominatingset = list(approximation.min_weighted_dominating_set(G, 'weight'))
		self.bell = dict(algo.shortest_paths.all_pairs_bellman_ford_path(G))
		self.intactset = self.dominatingset[:]
		self.current = self.start
		self.queue={}

		self.copydomi = self.dominatingset[:]
		self.shortest = 4000000000

		cal = 0
		while(len(self.dominatingset) != 0):
			for i in self.dominatingset:
				cal += self.calculateweight(self.bell[self.current][i])
				self.queue[i] = [cal, self.bell[self.current][i]]
			self.goal = min(self.queue, key = self.queue.get)
			self.walk += self.queue[self.goal][1][1:]
			self.queue={}
			self.current = self.goal
			self.dominatingset.remove(self.goal)
		self.walk += self.dijkreturn(self.current)[1:]
		minim = cal
		self.dominatingset= self.copydomi[:]
		self.permus = []

		if self.start in self.dominatingset:
			self.copydomi.remove(self.start)
			try:
				iterate = itertools.permutations(self.copydomi)
				for i in range(self.howmanyperms):
					self.permus += [next(iterate)]
			except:
				iterate = itertools.permutations(self.copydomi)
				self.permus = list(iterate)

		else:
			try:
				iterate = itertools.permutations(self.copydomi)
				for i in range(self.howmanyperms):
					self.permus += [next(iterate)]
			except:
				iterate = itertools.permutations(self.copydomi)
				self.permus = list(iterate)

		for i in self.permus:
			self.current = self.start
			cal = 0
			tempwalk =[]
			for k in i:
				cal += self.calculateweight(self.bell[self.current][k])
				tempwalk += self.bell[self.current][k][1:]
				self.current = k
			cal += self.calculateweight(self.bell[self.current][self.start])
			tempwalk += self.bell[self.current][self.start][1:]
			if minim > cal:
				self.walk = tempwalk[:]
				minim = cal
def getDominatingSet(G):

    G2 = nx.Graph(G)
    dominating_set = approximation.min_weighted_dominating_set(G2)

    for dom in dominating_set:
        G.nodes[dom]["shape"] = "box"

    return dominating_set
Example #8
0
def greedy_mds(G):
    mds = min_weighted_dominating_set(G)

    final = G.copy()
    for node in G.copy():
        if node not in mds:
            final.remove_node(node)

    if not nx.is_connected(final):
        return mwrc_approx(G)
    return mwrc_approx(final)
Example #9
0
def dominating_set(num_seeds, G):
    max_set = min_weighted_dominating_set(G)
    print("GOT CLIQUE")
    seeds = random.sample(max_set, min(num_seeds, len(max_set)))
    print("GOT FIRST SAMPLE")

    if len(seeds) < num_seeds:
        seeds.extend(
            random.sample(set(G.nodes()) - set(seeds), num_seeds - len(seeds)))
        # seeds = list(set(seeds))
    print("GOT SECOND SAMPLE")
    return seeds
Example #10
0
    def test_star_graph(self):
        """Tests that an approximate dominating set for the star graph,
        even when the center node does not have the smallest integer
        label, gives just the center node.

        For more information, see #1527.

        """
        # Create a star graph in which the center node has the highest
        # label instead of the lowest.
        G = nx.star_graph(10)
        G = nx.relabel_nodes(G, {0: 9, 9: 0})
        eq_(min_weighted_dominating_set(G), {9})
Example #11
0
    def test_star_graph(self):
        """Tests that an approximate dominating set for the star graph,
        even when the center node does not have the smallest integer
        label, gives just the center node.

        For more information, see #1527.

        """
        # Create a star graph in which the center node has the highest
        # label instead of the lowest.
        G = nx.star_graph(10)
        G = nx.relabel_nodes(G, {0: 9, 9: 0})
        eq_(min_weighted_dominating_set(G), {9})
Example #12
0
    def generate_random_new_way(self):
        og = min_weighted_dominating_set(self.graph, "weight")
        start_node = og.pop()
        holder = set()
        while og:
            curr_node = og.pop()
            holder.update(dijkstra_path(self.graph, start_node, curr_node))

        if holder and is_valid_network(self.graph,
                                       self.graph.subgraph(holder)):
            self.network = nx.minimum_spanning_tree(
                self.graph.subgraph(holder))
        else:
            self.generate_random_old_way(high_degree=True)
Example #13
0
    def test_min_weighted_dominating_set(self):
        graph = nx.Graph()
        graph.add_edge(1, 2)
        graph.add_edge(1, 5)
        graph.add_edge(2, 3)
        graph.add_edge(2, 5)
        graph.add_edge(3, 4)
        graph.add_edge(3, 6)
        graph.add_edge(5, 6)

        vertices = set([1, 2, 3, 4, 5, 6])
        # due to ties, this might be hard to test tight bounds
        dom_set = min_weighted_dominating_set(graph)
        for vertex in vertices - dom_set:
            neighbors = set(graph.neighbors(vertex))
            ok_(len(neighbors & dom_set) > 0, "Non dominating set found!")
    def test_min_weighted_dominating_set(self):
        graph = nx.Graph()
        graph.add_edge(1, 2)
        graph.add_edge(1, 5)
        graph.add_edge(2, 3)
        graph.add_edge(2, 5)
        graph.add_edge(3, 4)
        graph.add_edge(3, 6)
        graph.add_edge(5, 6)

        vertices = set([1, 2, 3, 4, 5, 6])
        # due to ties, this might be hard to test tight bounds
        dom_set = apxa.min_weighted_dominating_set(graph)
        for vertex in vertices - dom_set:
            neighbors = set(graph.neighbors(vertex))
            ok_(len(neighbors & dom_set) > 0, "Non dominating set found!")
Example #15
0
def strategy_4(input_filename, num_seeds):
	x = time.clock()
	G = nx.Graph()
	degrees = {}
	Gconnected = nx.Graph()
	#print("Graph loaded")
	for line in graph.items():
		node_id = int(line[0])
		neighbors = line[1]
		degrees[node_id] = len(neighbors)
		for neighbor in neighbors:
			G.add_edge(node_id, int(neighbor))
			Gconnected.add_edge(node_id, int(neighbor))
		if (len(neighbors) ==0):
			G.add_node(node_id)
 	
	dominating = apxa.min_weighted_dominating_set(G)
	#dominating = max(nx.connected_components(Gconnected), key= lambda x:len(x))
	complement = set(G.nodes()) - dominating
	#print(nx.number_of_nodes(G))
	#dominating = set(random.sample(G.nodes(), .9*nx.number_of_nodes(G)))
	#dominating = set(G.nodes())
	print("Dominating done")
	ha = time.clock()
	centralities = nx.algorithms.centrality.subgraph_centrality(G)
	print("subgraph done: " + str(time.clock()-ha))
	whoa = time.clock()
	centralities2 = nx.algorithms.centrality.betweenness_centrality_subset(G, list(dominating), list(complement))
	#centralities2 = nx.algorithms.centrality.closeness_centrality(G)
	#centralities2 = nx.algorithms.centrality.degree_centrality(G)
	print("Degree done:" + str(time.clock()-whoa))
	#centralities3 = nx.algorithms.centrality.harmonic_centrality(G, nbunch=dominating)
	ay = time.clock()
	centralities3 = nx.algorithms.centrality.eigenvector_centrality(G, max_iter=150, tol=1.0*10**-6)
	print("Harmonic done: " + str(time.clock() - ay))

	welp = time.clock()
	clusters = {}
	for node in dominating:
		c = nx.algorithms.cluster.clustering(G, node)
		clusters[node] = c

	print("Cluster done: " + str(time.clock()- welp))
	lol = time.clock()
	influence = {}
	for node in dominating:
		influence[node] = calc_centrality2(G, node, centralities, centralities2, centralities3, clusters, dominating)
	print("Neighbors: " + str(time.clock()-lol))
	# Normalize degrees for weighting with other heuristics
	# centrality = np.array(centrality) / np.linalg.norm(centrality) 

	sorted_inf = sorted(influence.keys(), key=lambda x : influence[x])
	result = sorted_inf[-int(num_seeds):]
	# degrees = nx.algorithms.centrality.degree_centrality(G)
	# max_degrees = sorted(degrees.keys(), key=lambda x: degrees[x])[-int(num_seeds)+a:]
	# count = 1
	# done = False
	# while(not done):
	# 	changed = False
	# 	for node in result:
	# 		if node in max_degrees:
	# 			result.remove(node)
	# 			result.append(sorted_inf[-int(num_seeds)-count])
	# 			count += 1
	# 			changed = True
	# 	if not changed: 
	# 		done = True

	print(time.clock() - x)
	# dominating = list(dominating)
	# max_degrees.extend(result[-a:])
	return [str(i) for i in result]
Example #16
0
def solve(G):
    """
    Idea for solve method below: is to basically find shortest paths (via Dijkstras) between each pair of vertices within the dominating set.
    Put all the vertices within the dominating set into a new graph G_prime, and add edges between each pair of vertices. Introduce new vertices
    into the graph as necessary to ensure connectivity. In the end, return the MST of G_prime. Also calls extra-vertex-optimization in the end.

    Args:
        G: networkx.Graph

    Returns:
        T: networkx.Graph
    """

    # TODO: your code here!
    dominatingSet = min_weighted_dominating_set(G, weight="weight")

    # The variable defined below, apsp, stands for all pairs shortest paths from calling NetworkX built-in Dijkstra's algorithm.
    apsp = dict(
        nx.algorithms.shortest_paths.weighted.all_pairs_dijkstra(
            G, weight="weight"))

    # G_prime, the new graph below, shall consist of all vertices within the dominating set along with their shortest path edge weights in
    # between, bringing in new vertices as necessary.
    G_prime = nx.Graph()
    G.add_nodes_from(dominatingSet)

    # Vertices to add contains new vertices which must be added into graph G prime in order to ensure connectivity of nodes from min
    # dominating set.
    extra_vertices = set()

    for node in dominatingSet:
        for node2 in dominatingSet:
            shortest_path = apsp[node][1][node2]
            # First, identify new vertices to be thrown into G prime.
            for vertex in shortest_path:
                if vertex not in dominatingSet:
                    G_prime.add_node(
                        vertex
                    )  # I do believe from my Internet search that duplicate nodes has no effect
                    extra_vertices.add(
                        vertex
                    )  # Keep track of the list of all vertices within the dominating set
            # Next, identify new edges to be thrown into G prime. Adding edges more than once has no effect.
            for i in range(len(shortest_path) - 1):
                origin_vertex = shortest_path[i]
                terminus_vertex = shortest_path[i + 1]
                w = G.get_edge_data(origin_vertex, terminus_vertex)['weight']
                G_prime.add_edge(origin_vertex, terminus_vertex, weight=w)

    final_edges = list(
        nx.minimum_spanning_edges(G_prime,
                                  algorithm='kruskal',
                                  weight='weight',
                                  keys=True,
                                  data=True,
                                  ignore_nan=False))

    T = nx.Graph()
    T.add_nodes_from(dominatingSet)
    T.add_nodes_from(extra_vertices)
    T.add_edges_from(final_edges)

    current_average = average_pairwise_distance(T)
    last_average = 4000
    print(current_average)

    # Until adding more edges doesn't improve the average pairwise cost
    while current_average < last_average:
        last_average = current_average
        # For every node in T
        for node in nx.dfs_preorder_nodes(T, source=list(T.nodes)[0]):
            neighbors = nx.algorithms.traversal.breadth_first_search.bfs_tree(
                G, node, reverse=False, depth_limit=1)
            # Get one of its neighbors NOT in T
            for node2 in neighbors:
                # and add the edge between that vertex and its neighbor
                # if it decreases the average pairwise cost.
                if node2 not in T and G.get_edge_data(node, node2)\
                and G[node][node2]['weight'] < current_average:
                    T.add_node(node2)
                    T.add_edge(node,
                               node2,
                               weight=G.get_edge_data(node, node2)['weight'])
                    new_average = average_pairwise_distance(T)
                    if new_average > current_average:
                        T.remove_node(node2)
                        #T.remove_edge(node, node2)
                    else:
                        current_average = new_average
                        print("Adding an edge between", node, "and", node2,
                              "yields average", new_average)

    print("Dominating vertices:", [node for node in T])

    return T
Example #17
0
def compute_summaries(G):
    """ Compute network features, computational times and their nature.
    
    Evaluate 54 summary statistics of a network G, plus 4 noise variables,
    store the computational time to evaluate each summary statistic, and keep
    track of their nature (discrete or not).
        
        Args:
            G (networkx.classes.graph.Graph):
                an undirected networkx graph.
        
        Returns:
            resDicts (tuple): 
                a tuple containing the elements:
                - dictSums (dict): a dictionary with the name of the summaries
                as keys and the summary statistic values as values;
                - dictTimes (dict): a dictionary with the name of the summaries
                as keys and the time to compute each one as values;
                - dictIsDist (dict): a dictionary indicating if the summary is 
                discrete (True) or not (False).
                
    """

    dictSums = dict()  # Will store the summary statistic values
    dictTimes = dict()  # Will store the evaluation times
    dictIsDisc = dict()  # Will store the summary statistic nature

    # Extract the largest connected component
    Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
    G_lcc = G.subgraph(Gcc[0])

    # Number of edges
    start = time.time()
    dictSums["num_edges"] = G.number_of_edges()
    dictTimes["num_edges"] = time.time() - start
    dictIsDisc["num_edges"] = True

    # Number of connected components
    start = time.time()
    dictSums["num_of_CC"] = nx.number_connected_components(G)
    dictTimes["num_of_CC"] = time.time() - start
    dictIsDisc["num_of_CC"] = True

    # Number of nodes in the largest connected component
    start = time.time()
    dictSums["num_nodes_LCC"] = nx.number_of_nodes(G_lcc)
    dictTimes["num_nodes_LCC"] = time.time() - start
    dictIsDisc["num_nodes_LCC"] = True

    # Number of edges in the largest connected component
    start = time.time()
    dictSums["num_edges_LCC"] = G_lcc.number_of_edges()
    dictTimes["num_edges_LCC"] = time.time() - start
    dictIsDisc["num_edges_LCC"] = True

    # Diameter of the largest connected component
    start = time.time()
    dictSums["diameter_LCC"] = nx.diameter(G_lcc)
    dictTimes["diameter_LCC"] = time.time() - start
    dictIsDisc["diameter_LCC"] = True

    # Average geodesic distance (shortest path length in the LCC)
    start = time.time()
    dictSums["avg_geodesic_dist_LCC"] = nx.average_shortest_path_length(G_lcc)
    dictTimes["avg_geodesic_dist_LCC"] = time.time() - start
    dictIsDisc["avg_geodesic_dist_LCC"] = False

    # Average degree of the neighborhood of each node
    start = time.time()
    dictSums["avg_deg_connectivity"] = np.mean(
        list(nx.average_degree_connectivity(G).values()))
    dictTimes["avg_deg_connectivity"] = time.time() - start
    dictIsDisc["avg_deg_connectivity"] = False

    # Average degree of the neighbors of each node in the LCC
    start = time.time()
    dictSums["avg_deg_connectivity_LCC"] = np.mean(
        list(nx.average_degree_connectivity(G_lcc).values()))
    dictTimes["avg_deg_connectivity_LCC"] = time.time() - start
    dictIsDisc["avg_deg_connectivity_LCC"] = False

    # Recover the degree distribution
    start_degree_extract = time.time()
    degree_vals = list(dict(G.degree()).values())
    degree_extract_time = time.time() - start_degree_extract

    # Entropy of the degree distribution
    start = time.time()
    dictSums["degree_entropy"] = ss.entropy(degree_vals)
    dictTimes["degree_entropy"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_entropy"] = False

    # Maximum degree
    start = time.time()
    dictSums["degree_max"] = max(degree_vals)
    dictTimes["degree_max"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_max"] = True

    # Average degree
    start = time.time()
    dictSums["degree_mean"] = np.mean(degree_vals)
    dictTimes["degree_mean"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_mean"] = False

    # Median degree
    start = time.time()
    dictSums["degree_median"] = np.median(degree_vals)
    dictTimes["degree_median"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_median"] = False

    # Standard deviation of the degree distribution
    start = time.time()
    dictSums["degree_std"] = np.std(degree_vals)
    dictTimes["degree_std"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_std"] = False

    # Quantile 25%
    start = time.time()
    dictSums["degree_q025"] = np.quantile(degree_vals, 0.25)
    dictTimes["degree_q025"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_q025"] = False

    # Quantile 75%
    start = time.time()
    dictSums["degree_q075"] = np.quantile(degree_vals, 0.75)
    dictTimes["degree_q075"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_q075"] = False

    # Average geodesic distance
    start = time.time()
    dictSums["avg_shortest_path_length_LCC"] = nx.average_shortest_path_length(
        G_lcc)
    dictTimes["avg_shortest_path_length_LCC"] = time.time() - start
    dictIsDisc["avg_shortest_path_length_LCC"] = False

    # Average global efficiency:
    # The efficiency of a pair of nodes in a graph is the multiplicative
    # inverse of the shortest path distance between the nodes.
    # The average global efficiency of a graph is the average efficiency of
    # all pairs of nodes.
    start = time.time()
    dictSums["avg_global_efficiency"] = nx.global_efficiency(G)
    dictTimes["avg_global_efficiency"] = time.time() - start
    dictIsDisc["avg_global_efficiency"] = False

    # Harmonic mean which is 1/avg_global_efficiency
    start = time.time()
    dictSums["harmonic_mean"] = nx.global_efficiency(G)
    dictTimes["harmonic_mean"] = time.time() - start
    dictIsDisc["harmonic_mean"] = False

    # Average local efficiency
    # The local efficiency of a node in the graph is the average global
    # efficiency of the subgraph induced by the neighbors of the node.
    # The average local efficiency is the average of the
    # local efficiencies of each node.
    start = time.time()
    dictSums["avg_local_efficiency_LCC"] = nx.local_efficiency(G_lcc)
    dictTimes["avg_local_efficiency_LCC"] = time.time() - start
    dictIsDisc["avg_local_efficiency_LCC"] = False

    # Node connectivity
    # The node connectivity is equal to the minimum number of nodes that
    # must be removed to disconnect G or render it trivial.
    # Only on the largest connected component here.
    start = time.time()
    dictSums["node_connectivity_LCC"] = nx.node_connectivity(G_lcc)
    dictTimes["node_connectivity_LCC"] = time.time() - start
    dictIsDisc["node_connectivity_LCC"] = True

    # Edge connectivity
    # The edge connectivity is equal to the minimum number of edges that
    # must be removed to disconnect G or render it trivial.
    # Only on the largest connected component here.
    start = time.time()
    dictSums["edge_connectivity_LCC"] = nx.edge_connectivity(G_lcc)
    dictTimes["edge_connectivity_LCC"] = time.time() - start
    dictIsDisc["edge_connectivity_LCC"] = True

    # Graph transitivity
    # 3*times the number of triangles divided by the number of triades
    start = time.time()
    dictSums["transitivity"] = nx.transitivity(G)
    dictTimes["transitivity"] = time.time() - start
    dictIsDisc["transitivity"] = False

    # Number of triangles
    start = time.time()
    dictSums["num_triangles"] = np.sum(list(nx.triangles(G).values())) / 3
    dictTimes["num_triangles"] = time.time() - start
    dictIsDisc["num_triangles"] = True

    # Estimate of the average clustering coefficient of G:
    # Average local clustering coefficient, with local clustering coefficient
    # defined as C_i = (nbr of pairs of neighbors of i that are connected)/(nbr of pairs of neighbors of i)
    start = time.time()
    dictSums["avg_clustering_coef"] = nx.average_clustering(G)
    dictTimes["avg_clustering_coef"] = time.time() - start
    dictIsDisc["avg_clustering_coef"] = False

    # Square clustering (averaged over nodes):
    # the fraction of possible squares that exist at the node.

    # We average it over nodes
    start = time.time()
    dictSums["square_clustering_mean"] = np.mean(
        list(nx.square_clustering(G).values()))
    dictTimes["square_clustering_mean"] = time.time() - start
    dictIsDisc["square_clustering_mean"] = False

    # We compute the median
    start = time.time()
    dictSums["square_clustering_median"] = np.median(
        list(nx.square_clustering(G).values()))
    dictTimes["square_clustering_median"] = time.time() - start
    dictIsDisc["square_clustering_median"] = False

    # We compute the standard deviation
    start = time.time()
    dictSums["square_clustering_std"] = np.std(
        list(nx.square_clustering(G).values()))
    dictTimes["square_clustering_std"] = time.time() - start
    dictIsDisc["square_clustering_std"] = False

    # Number of 2-cores
    start = time.time()
    dictSums["num_2cores"] = len(nx.k_core(G, k=2))
    dictTimes["num_2cores"] = time.time() - start
    dictIsDisc["num_2cores"] = True

    # Number of 3-cores
    start = time.time()
    dictSums["num_3cores"] = len(nx.k_core(G, k=3))
    dictTimes["num_3cores"] = time.time() - start
    dictIsDisc["num_3cores"] = True

    # Number of 4-cores
    start = time.time()
    dictSums["num_4cores"] = len(nx.k_core(G, k=4))
    dictTimes["num_4cores"] = time.time() - start
    dictIsDisc["num_4cores"] = True

    # Number of 5-cores
    start = time.time()
    dictSums["num_5cores"] = len(nx.k_core(G, k=5))
    dictTimes["num_5cores"] = time.time() - start
    dictIsDisc["num_5cores"] = True

    # Number of 6-cores
    start = time.time()
    dictSums["num_6cores"] = len(nx.k_core(G, k=6))
    dictTimes["num_6cores"] = time.time() - start
    dictIsDisc["num_6cores"] = True

    # Number of k-shells
    # The k-shell is the subgraph induced by nodes with core number k.
    # That is, nodes in the k-core that are not in the k+1-core

    # Number of 2-shells
    start = time.time()
    dictSums["num_2shells"] = len(nx.k_shell(G, 2))
    dictTimes["num_2shells"] = time.time() - start
    dictIsDisc["num_2shells"] = True

    # Number of 3-shells
    start = time.time()
    dictSums["num_3shells"] = len(nx.k_shell(G, 3))
    dictTimes["num_3shells"] = time.time() - start
    dictIsDisc["num_3shells"] = True

    # Number of 4-shells
    start = time.time()
    dictSums["num_4shells"] = len(nx.k_shell(G, 4))
    dictTimes["num_4shells"] = time.time() - start
    dictIsDisc["num_4shells"] = True

    # Number of 5-shells
    start = time.time()
    dictSums["num_5shells"] = len(nx.k_shell(G, 5))
    dictTimes["num_5shells"] = time.time() - start
    dictIsDisc["num_5shells"] = True

    # Number of 6-shells
    start = time.time()
    dictSums["num_6shells"] = len(nx.k_shell(G, 6))
    dictTimes["num_6shells"] = time.time() - start
    dictIsDisc["num_6shells"] = True

    start = time.time()
    listOfCliques = list(nx.enumerate_all_cliques(G))
    enum_all_cliques_time = time.time() - start

    # Number of 4-cliques
    start = time.time()
    n4Clique = 0
    for li in listOfCliques:
        if len(li) == 4:
            n4Clique += 1
    dictSums["num_4cliques"] = n4Clique
    dictTimes["num_4cliques"] = time.time() - start + enum_all_cliques_time
    dictIsDisc["num_4cliques"] = True

    # Number of 5-cliques
    start = time.time()
    n5Clique = 0
    for li in listOfCliques:
        if len(li) == 5:
            n5Clique += 1
    dictSums["num_5cliques"] = n5Clique
    dictTimes["num_5cliques"] = time.time() - start + enum_all_cliques_time
    dictIsDisc["num_5cliques"] = True

    # Maximal size of a clique in the graph
    start = time.time()
    dictSums["max_clique_size"] = len(approximation.clique.max_clique(G))
    dictTimes["max_clique_size"] = time.time() - start
    dictIsDisc["max_clique_size"] = True

    # Approximated size of a large clique in the graph
    start = time.time()
    dictSums["large_clique_size"] = approximation.large_clique_size(G)
    dictTimes["large_clique_size"] = time.time() - start
    dictIsDisc["large_clique_size"] = True

    # Number of shortest path of size k
    start = time.time()
    listOfPLength = list(nx.shortest_path_length(G))
    path_length_time = time.time() - start

    # when k = 3
    start = time.time()
    n3Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n3Paths += tmp.count(3)
    dictSums["num_shortest_3paths"] = n3Paths / 2
    dictTimes["num_shortest_3paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_3paths"] = True

    # when k = 4
    start = time.time()
    n4Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n4Paths += tmp.count(4)
    dictSums["num_shortest_4paths"] = n4Paths / 2
    dictTimes["num_shortest_4paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_4paths"] = True

    # when k = 5
    start = time.time()
    n5Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n5Paths += tmp.count(5)
    dictSums["num_shortest_5paths"] = n5Paths / 2
    dictTimes["num_shortest_5paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_5paths"] = True

    # when k = 6
    start = time.time()
    n6Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n6Paths += tmp.count(6)
    dictSums["num_shortest_6paths"] = n6Paths / 2
    dictTimes["num_shortest_6paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_6paths"] = True

    # Size of the minimum (weight) node dominating set:
    # A subset of nodes where each node not in the subset has for direct
    # neighbor a node of the dominating set.
    start = time.time()
    T = approximation.min_weighted_dominating_set(G)
    dictSums["size_min_node_dom_set"] = len(T)
    dictTimes["size_min_node_dom_set"] = time.time() - start
    dictIsDisc["size_min_node_dom_set"] = True

    # Idem but with the edge dominating set
    start = time.time()
    T = approximation.min_edge_dominating_set(G)
    dictSums["size_min_edge_dom_set"] = 2 * len(
        T)  # times 2 to have a number of nodes
    dictTimes["size_min_edge_dom_set"] = time.time() - start
    dictIsDisc["size_min_edge_dom_set"] = True

    # The Wiener index of a graph is the sum of the shortest-path distances
    # between each pair of reachable nodes. For pairs of nodes in undirected graphs,
    # only one orientation of the pair is counted.
    # (On LCC otherwise inf)
    start = time.time()
    dictSums["wiener_index_LCC"] = nx.wiener_index(G_lcc)
    dictTimes["wiener_index_LCC"] = time.time() - start
    dictIsDisc["wiener_index_LCC"] = True

    # Betweenness node centrality (averaged over nodes):
    # at node u it is defined as B_u = sum_i,j sigma(i,u,j)/sigma(i,j)
    # where sigma is the number of shortest path between i and j going through u or not

    start = time.time()
    betweenness = list(nx.betweenness_centrality(G).values())
    time_betweenness = time.time() - start

    # Averaged across nodes
    start = time.time()
    dictSums["betweenness_centrality_mean"] = np.mean(betweenness)
    dictTimes["betweenness_centrality_mean"] = time.time(
    ) - start + time_betweenness
    dictIsDisc["betweenness_centrality_mean"] = False

    # Maximum across nodes
    start = time.time()
    dictSums["betweenness_centrality_max"] = max(betweenness)
    dictTimes["betweenness_centrality_max"] = time.time(
    ) - start + time_betweenness
    dictIsDisc["betweenness_centrality_max"] = False

    # Central point dominance
    # CPD = sum_u(B_max - B_u)/(N-1)
    start = time.time()
    dictSums["central_point_dominance"] = sum(
        max(betweenness) - np.array(betweenness)) / (len(betweenness) - 1)
    dictTimes["central_point_dominance"] = time.time(
    ) - start + time_betweenness
    dictIsDisc["central_point_dominance"] = False

    # Estrata index : sum_i^n exp(lambda_i)
    # with n the number of nodes, lamda_i the i-th eigen value of the adjacency matrix of G
    start = time.time()
    dictSums["Estrata_index"] = nx.estrada_index(G)
    dictTimes["Estrata_index"] = time.time() - start
    dictIsDisc["Estrata_index"] = False

    # Eigenvector centrality
    # For each node, it is the average eigenvalue centrality of its neighbors,
    # where centrality of node i is taken as the i-th coordinate of x
    # such that Ax = lambda*x (for the maximal eigen value)

    # Averaged
    start = time.time()
    dictSums["avg_eigenvec_centrality"] = np.mean(
        list(nx.eigenvector_centrality_numpy(G).values()))
    dictTimes["avg_eigenvec_centrality"] = time.time() - start
    dictIsDisc["avg_eigenvec_centrality"] = False

    # Maximum
    start = time.time()
    dictSums["max_eigenvec_centrality"] = max(
        list(nx.eigenvector_centrality_numpy(G).values()))
    dictTimes["max_eigenvec_centrality"] = time.time() - start
    dictIsDisc["max_eigenvec_centrality"] = False

    ### Noise generation ###

    # Noise simulated from a Normal(0,1) distribution
    start = time.time()
    dictSums["noise_Gauss"] = ss.norm.rvs(0, 1)
    dictTimes["noise_Gauss"] = time.time() - start
    dictIsDisc["noise_Gauss"] = False

    # Noise simulated from a Uniform distribution [0-50]
    start = time.time()
    dictSums["noise_Unif"] = ss.uniform.rvs(0, 50)
    dictTimes["noise_Unif"] = time.time() - start
    dictIsDisc["noise_Unif"] = False

    # Noise simulated from a Bernoulli B(0.5) distribution
    start = time.time()
    dictSums["noise_Bern"] = ss.bernoulli.rvs(0.5)
    dictTimes["noise_Bern"] = time.time() - start
    dictIsDisc["noise_Bern"] = True

    # Noise simulated from a discrete uniform distribution [0,50[
    start = time.time()
    dictSums["noise_disc_Unif"] = ss.randint.rvs(0, 50)
    dictTimes["noise_disc_Unif"] = time.time() - start
    dictIsDisc["noise_disc_Unif"] = True

    resDicts = (dictSums, dictTimes, dictIsDisc)

    return resDicts
Example #18
0
def strategy_2(input_filename, num_seeds):
	x = time.clock()
	G = nx.Graph()
	degrees = []
	Gconnected = nx.Graph()
	#print("Graph loaded")
	for line in graph.items():
		node_id = int(line[0])
		neighbors = line[1]
		degrees.append(float(len(neighbors)))
		for neighbor in neighbors:
			G.add_edge(node_id, int(neighbor))
			Gconnected.add_edge(node_id, int(neighbor))
		if (len(neighbors) ==0):
			G.add_node(node_id)

	dominating = apxa.min_weighted_dominating_set(G)
	#dominating = max(nx.connected_components(Gconnected), key= lambda x:len(x))
	complement = set(G.nodes()) - dominating
	print("1")
	centralities = nx.algorithms.centrality.closeness_centrality(G)
	#centralities = nx.algorithms.centrality.betweenness_centrality_subset(G, list(dominating), list(dominating - vc))
	centrality = []
	for node in dominating:
		centrality.append(centralities[node])

	# Normalize degrees for weighting with other heuristics
	centrality = np.array(centrality) / np.linalg.norm(centrality) 
	print("2")
	centralities2 = nx.algorithms.centrality.betweenness_centrality_subset(G, list(dominating), list(complement))
	centrality2 = []
	for node in dominating:
		centrality2.append(centralities2[node])
	
	# Normalize degrees for weighting with other heuristics
	centrality2 = np.array(centrality2) / np.linalg.norm(centrality2) 
	print("3")
	centralities3 = nx.algorithms.centrality.subgraph_centrality(G)
	centrality3 = []
	for node in dominating:
		centrality3.append(centralities2[node])
	
	# Normalize degrees for weighting with other heuristics
	centrality3 = np.array(centrality2) / np.linalg.norm(centrality2) 
	print("4")
	result = []
	# for i in sorted_degrees[11:]:
	# 	result.append(i[0])
	clusters = []
	for node in dominating:
		c = nx.algorithms.cluster.clustering(G, node)
		clusters.append(c)

	clusters = np.array(clusters) / np.linalg.norm(clusters)
	print("5")
	influence = {}
	for node, degree in enumerate(centrality):
		influence[node] = 5*degree + 5*centrality2[node] + 5*centrality3[node] - 1.* clusters[node]

	result = sorted(influence.keys(), key=lambda x : influence[x])[-int(num_seeds):]
	print(time.clock() - x)
	dominating = list(dominating)
	return [str(dominating[i]) for i in result]


# diameter(b) 
# This will work only for graphs that are connected
diameter = -1
if numberConnectedComponents == 1:
    diameter = nx.diameter(b)

#print(diameter, sizeMaxClique)


# The maximum clique is returned as a set of nodes
# max_clique(b)
maxClique = naa.max_clique(b)
sizeMaxClique = len(maxClique)

print (diameter, sizeMaxClique)

# The dominating set is returned as a set of nodes
# min_weighted_dominating_set(b)
minDominatingSet = naa.min_weighted_dominating_set(b)
sizeMinDominatingSet = len(minDominatingSet)

# The number of maximal cliques in the graph 
# graph_number_of_cliques(b)
numberOfCliques = nx.graph_number_of_cliques(b)


print (numberConnectedComponents,diameter,sizeMaxClique,sizeMinDominatingSet,numberOfCliques)
Example #20
0
    def get_dominating_sets(cls, G, weight=None):
        """get a dominating sets 
		"""
        dominating_sets = nxaa.min_weighted_dominating_set(G, weight=weight)

        return dominating_sets
Example #21
0
    def helper(G, start):

        visited2 = []  # List to keep track of visited nodes.
        queue2 = []  #Initialize a queue

        def bfs_set_weights(visited, graph, node):
            visited2.append(node)
            queue2.append(node)

            node_weights = {}
            node_weights[node] = 1

            while queue2:
                s = queue2.pop(0)
                for neighbour in G.neighbors(s):
                    if neighbour not in visited2:
                        visited2.append(neighbour)
                        queue2.append(neighbour)
                        node_weight = G.get_edge_data(neighbour, s)['weight']
                        node_weights[neighbour] = node_weight
            return node_weights

        node_weights_dict = bfs_set_weights(visited2, G, start)
        nx.set_node_attributes(G, node_weights_dict, 'node_weight')
        D = min_weighted_dominating_set(G, 'node_weight')
        for node2 in list(G.nodes):
            remove_attribute(G, node2, 'node_weight')

        visited = []  # List to keep track of visited nodes.
        queue = []  #Initialize a queue

        def bfs(visited, graph, node):
            visited.append(node)
            queue.append(node)
            level_tracker = {}
            level_tracker[node] = 0

            levels = {}
            levels[0] = [[node], [node, True]]

            while queue:
                s = queue.pop(0)

                for neighbour in G.neighbors(s):
                    if neighbour not in visited:
                        visited.append(neighbour)
                        queue.append(neighbour)
                        level_count = level_tracker.get(s) + 1
                        level_tracker[neighbour] = level_count
                        if neighbour in D:
                            dom_set = True
                        else:
                            dom_set = False
                        if level_count in levels.keys():
                            if dom_set == True:
                                levels[level_count].append(
                                    [neighbour, dom_set])
                                levels[level_count][0].append(neighbour)
                            else:
                                levels[level_count].append(
                                    [neighbour, dom_set])
                        else:
                            if dom_set == True:
                                levels[level_count] = [[neighbour],
                                                       [neighbour, dom_set]]
                            else:
                                levels[level_count] = [[],
                                                       [neighbour, dom_set]]

            return levels

        bfs_levels = bfs(visited, G, start)
        #print(bfs_levels)
        #print(list(G.edges))
        #print('edge data ', G.get_edge_data(0,1,default=0)['weight'])

        leaf_level = max(bfs_levels.keys())

        level = leaf_level
        d_set_levels = []
        while level >= 0:
            if bfs_levels[level][0]:
                d_set = bfs_levels[level][0]

                d_set_levels.append(d_set)
            level = level - 1

        newG = nx.Graph()
        for i in range(len(d_set_levels) - 1):
            d_level = d_set_levels[i]
            level_above = d_set_levels[i + 1]
            for d_node in d_level:
                for possible_parent in level_above:
                    if nx.has_path(G, d_node, possible_parent):
                        parent = possible_parent
                        path = nx.dijkstra_path(G, d_node, parent, 'weight')
                        path_sub = G.subgraph(path).copy()
                        newG.update(path_sub)
        #print(list(newG.edges))

        T = nx.minimum_spanning_tree(newG)
        if list(newG.edges) == []:
            T.add_node(d_set_levels[0][0])
            return T
        return T