Esempio n. 1
0
def naively_expand_query_nodes(graph_db, query_nodes, a = 1):
	api_calls = 0
	budget = a + 1

	# initialize graph
	graph = nx.Graph()
	if len(query_nodes) == 0:
		return graph

	# we add query nodes
	for node in query_nodes:
		graph.add_node(node)

	opened_nodes = set([])
	frontier = set(query_nodes[:])

	step = 0
	while step < budget and frontier:
		for f in frontier:
			neighbors = graph_db.get_neighbors(f)
			if neighbors:
				graph.add_edges_from([(f,n) for n in neighbors])
		api_calls = api_calls + len(frontier)
		opened_nodes |= frontier
		frontier = set(graph.nodes()) - opened_nodes
		step = step + 1

	return (graph, api_calls)
Esempio n. 2
0
def expand_query_nodes(graph_db, query_nodes, a = 1):
	api_calls = 0
	budget = a + 1

	# initialize graph
	graph = nx.Graph()
	if len(query_nodes) == 0:
		return graph

	# we add query nodes
	for node in query_nodes:
		graph.add_node(node)
	# graph.add_nodes_from(query_nodes)
	
	opened_nodes = set([])
	frontier = _get_new_frontier(graph, opened_nodes, query_nodes, budget)
	
	while frontier:
		for f in frontier:
			neighbors = graph_db.get_neighbors(f)
			if neighbors:
				graph.add_edges_from([(f,n) for n in neighbors])
		api_calls = api_calls + len(frontier)

		opened_nodes |= set(frontier)
		frontier = _get_new_frontier(graph, opened_nodes, query_nodes, budget)

		# print(api_calls)
		# if api_calls > 10000:
		# 	sys.exit()

	#print(frontier)
	return (graph, api_calls)
Esempio n. 3
0
def smart_expand(graph_db, query_nodes, a = 1, approx_threshold = 1.00, samples = 1):

	api_calls = 0
	num_of_edges = 0
	schedule = 64
	schedule_multiplier = 2

	# initialize graph
	graph = nx.Graph()
	if len(query_nodes) == 0:
		return (graph, 0)

	# we add query nodes
	for node in query_nodes:
		graph.add_node(node)

	# maintain a list of nodes on the frontier
	
	nodes_seen = set(query_nodes)

	nodeComponent = {}
	components    = []
	for q in query_nodes:
		comp = Component( set([q]) )
		components.append( comp )
		nodeComponent[q] = comp

	current_best = 1.0
	upper_bound = 1.0 * len(query_nodes)
	approx_ratio = current_best / upper_bound
	# print(approx_ratio, current_best, upper_bound)

	while would_you_expand(components) and len(components) > 1 and (approx_ratio < approx_threshold):
		# select the next node to expand
		next_components = []

		for c in components:

			if not c.is_active() or len(c.nodes_to_expand) == 0:
				continue

			lucky_node = heappop( c.nodes_to_expand )[1]
	
			comp_of_lucky_node = nodeComponent.get(lucky_node, None)
			assert comp_of_lucky_node != None
		
			neighbors = graph_db.get_neighbors(lucky_node)
			if neighbors == None:
				neighbors = []
	
			api_calls += 1
			for nbor in neighbors:
				if not nbor in nodes_seen:
					# nodes_to_expand.add(nbor)
					heappush( c.nodes_to_expand, (random.random(), nbor) )
					nodes_seen.add(nbor)
					nodeComponent[nbor] = comp_of_lucky_node
					comp_of_lucky_node.add(nbor)
					comp_of_lucky_node.add_edge( lucky_node, nbor )
				else:
					comp_of_nbor = nodeComponent.get(nbor,None)
					assert comp_of_nbor != None
					if comp_of_lucky_node != comp_of_nbor:
						union = Component( comp_of_lucky_node, comp_of_nbor )
						union.add_edge( lucky_node, nbor )
						comp_of_lucky_node.set_inactive()
						comp_of_nbor.set_inactive()
						next_components.append(union)

						for u in union.S:
							nodeComponent[u] = union
						comp_of_lucky_node = union
		
				graph.add_edge(lucky_node, nbor)
				num_of_edges += 1		

		components = [c for c in components + next_components if c.is_active()]

		if api_calls >= schedule:	
			
			if schedule >= 8192:
				schedule += 8192
			else:
				schedule *= schedule_multiplier
	
			best_with = [1]
			best_ever = [1]
	
			# for each connected component, pick one node at the frontier
			for comp in components:
				partial_solution = comp.get_partial_solution( graph, query_nodes, a, samples)
				if partial_solution != None:
					best_with.append(partial_solution[0][0])
					best_ever.append(partial_solution[1][0])
	
			current_best = max(current_best, max(best_ever))
			upper_bound = sum([sc for sc in best_with if sc > 0])
			approx_ratio = 1.0 * current_best / upper_bound

			sys.stderr.write(str(api_calls) + '\t' + str(len(components)) + '\t' 
				+ str(num_of_edges) + '\t'
				+ str(approx_ratio) + '\n')
			comp_sizes = [len(c.S) for c in components]
			comp_sizes.sort(reverse = True)
			sys.stderr.write(str(comp_sizes) + '\n')

	return (graph, api_calls)