Exemple #1
0
	def get_best_actions(self, sess, state_per_transition, actionable_nodes_per_transition, actions_vector):

		logging.debug("There are " + str(len(state_per_transition)) + " states per transitions.")

		features_per_graph = [state.feature_matrix for state in state_per_transition]
		nodes_mask_per_graph = [actionable_node_indexes for actionable_node_indexes in actionable_nodes_per_transition]

		logging.debug("Getting best actions")	
		feed = construct_masked_feed_dict(self.placeholders, features_per_graph, self.support, FLAGS.num_simultaneous_graphs, len(actions_vector), nodes_mask_per_graph=nodes_mask_per_graph)

		rewards = sess.run([self.model.masked_prediction_op],feed_dict=feed)[0]
		
		best_future_action_per_transition = []
		best_future_reward_per_transition = []

		# TODO should be general to output_dim, not just cpu/gpu
		num_nodes_analyzed = 0
		for graph_idx in range(len(state_per_transition)):

			rewards_for_transition = rewards[num_nodes_analyzed:num_nodes_analyzed+len(actionable_nodes_per_transition[graph_idx])]

			# length of this vector == actionable_nodes_per_transition[graph_idx]
			cpu_rewards = rewards_for_transition.transpose()[0].transpose()
			gpu_rewards = rewards_for_transition.transpose()[1].transpose()

			max_cpu_reward = np.amax(cpu_rewards)
			max_gpu_reward = np.amax(gpu_rewards)

			best_cpu_index = np.random.choice(np.argwhere(cpu_rewards == max_cpu_reward).flatten(),1)[0]
			best_gpu_index = np.random.choice(np.argwhere(gpu_rewards == max_gpu_reward).flatten(),1)[0]

			action = Action()

			if max_cpu_reward == max_gpu_reward:

				allocation = np.random.choice(actions_vector,1)[0]
				
				action.node_idx = [actionable_nodes_per_transition[graph_idx][best_cpu_index], actionable_nodes_per_transition[graph_idx][best_gpu_index]][allocation]
				action.label = allocation
				best_reward = max_cpu_reward

			elif max_cpu_reward > max_gpu_reward:

				action.node_idx = actionable_nodes_per_transition[graph_idx][best_cpu_index]
				action.label = 0
				best_reward = max_cpu_reward

			elif max_cpu_reward < max_gpu_reward:

				action.node_idx = actionable_nodes_per_transition[graph_idx][best_gpu_index]
				action.label = 1
				best_reward = max_gpu_reward

			best_future_action_per_transition.append(action)
			best_future_reward_per_transition.append(best_reward)

			num_nodes_analyzed += len(actionable_nodes_per_transition[graph_idx])

		return best_future_action_per_transition, best_future_reward_per_transition
	def get_best_action(self, sess, state, actionable_nodes, actions_vector, sample_idx=0):

		if self.include_partial_solution:
			features_per_graph = [np.copy(state.feature_matrix)]
		else:
			features_per_graph = [state.feature_matrix.transpose()[:self.num_features].transpose()]

		nodes_mask_per_graph = [1]
		
		if self.variable_support:
			all_nodes = np.arange(len(state.feature_matrix))
			actioned_or_actionable = np.concatenate((state.partial_solution_node_indexes, actionable_nodes))
			all_non_considered_nodes = np.setxor1d(all_nodes, actioned_or_actionable)
			constrained_adj = sp.csr_matrix(self.undirected_adj)
			constrained_adj = zero_rows(self.sparse_undirected_adj, all_non_considered_nodes)
			constrained_adj = zero_columns(constrained_adj, all_non_considered_nodes)
			constrained_support = preprocess_adj(constrained_adj.tocoo())
			support_per_graph = [constrained_support]
		else:
			support_per_graph = [self.sparse_constant_support]
		
		if self.zero_non_included_nodes:
			# zero all (non-actioned or non-actionable nodes) features - we do this so that these nodes have no affect on the actioned nodes (that we care about) during convolution
			all_nodes = np.arange(len(state.feature_matrix))
			actioned_or_actionable = np.concatenate((state.partial_solution_node_indexes, actionable_nodes))
			all_non_considered_nodes = np.setxor1d(all_nodes, actioned_or_actionable)
			features_per_graph[0][all_non_considered_nodes] = np.zeros(self.num_features, np.float32)
		
		feed = self.construct_masked_feed_dict(self.placeholders, features_per_graph, support_per_graph, FLAGS.num_simultaneous_graphs, len(actions_vector), nodes_mask_per_graph=nodes_mask_per_graph)

		#run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
		#run_metadata = tf.RunMetadata()

		#probabilities = sess.run([self.model.masked_prediction_op],feed_dict=feed, options=run_options, run_metadata=run_metadata)[0]
		#tl = timeline.Timeline(run_metadata.step_stats)
		#ctf = tl.generate_chrome_trace_format()
		#with open('get_best_action_timeline.json', 'w') as f:
		#	f.write(ctf)
		if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
			probabilities = sess.run([self.model.prediction_op,self.model.pred_print_ops],feed_dict=feed)[0]
		else:
			probabilities = sess.run([self.model.prediction_op],feed_dict=feed)[0]

		logging.info("Probability map across actions for sample " + str(sample_idx) + " was: " + str(":".join(list(map(str,probabilities)))))

		selected_node_action = np.random.choice(range(probabilities.size), p=probabilities.ravel()) 
		node_idx, allocation = np.unravel_index(selected_node_action, probabilities.shape) 

		action = Action()
		action.node_idx = actionable_nodes[node_idx]
		action.label = allocation

		return action, probabilities[node_idx][allocation]