def get_best_actions(self, sess, state_per_transition, actionable_nodes_per_transition, actions_vector): logging.debug("There are " + str(len(state_per_transition)) + " states per transitions.") features_per_graph = [state.feature_matrix for state in state_per_transition] nodes_mask_per_graph = [actionable_node_indexes for actionable_node_indexes in actionable_nodes_per_transition] logging.debug("Getting best actions") feed = construct_masked_feed_dict(self.placeholders, features_per_graph, self.support, FLAGS.num_simultaneous_graphs, len(actions_vector), nodes_mask_per_graph=nodes_mask_per_graph) rewards = sess.run([self.model.masked_prediction_op],feed_dict=feed)[0] best_future_action_per_transition = [] best_future_reward_per_transition = [] # TODO should be general to output_dim, not just cpu/gpu num_nodes_analyzed = 0 for graph_idx in range(len(state_per_transition)): rewards_for_transition = rewards[num_nodes_analyzed:num_nodes_analyzed+len(actionable_nodes_per_transition[graph_idx])] # length of this vector == actionable_nodes_per_transition[graph_idx] cpu_rewards = rewards_for_transition.transpose()[0].transpose() gpu_rewards = rewards_for_transition.transpose()[1].transpose() max_cpu_reward = np.amax(cpu_rewards) max_gpu_reward = np.amax(gpu_rewards) best_cpu_index = np.random.choice(np.argwhere(cpu_rewards == max_cpu_reward).flatten(),1)[0] best_gpu_index = np.random.choice(np.argwhere(gpu_rewards == max_gpu_reward).flatten(),1)[0] action = Action() if max_cpu_reward == max_gpu_reward: allocation = np.random.choice(actions_vector,1)[0] action.node_idx = [actionable_nodes_per_transition[graph_idx][best_cpu_index], actionable_nodes_per_transition[graph_idx][best_gpu_index]][allocation] action.label = allocation best_reward = max_cpu_reward elif max_cpu_reward > max_gpu_reward: action.node_idx = actionable_nodes_per_transition[graph_idx][best_cpu_index] action.label = 0 best_reward = max_cpu_reward elif max_cpu_reward < max_gpu_reward: action.node_idx = actionable_nodes_per_transition[graph_idx][best_gpu_index] action.label = 1 best_reward = max_gpu_reward best_future_action_per_transition.append(action) best_future_reward_per_transition.append(best_reward) num_nodes_analyzed += len(actionable_nodes_per_transition[graph_idx]) return best_future_action_per_transition, best_future_reward_per_transition
def get_best_action(self, sess, state, actionable_nodes, actions_vector, sample_idx=0): if self.include_partial_solution: features_per_graph = [np.copy(state.feature_matrix)] else: features_per_graph = [state.feature_matrix.transpose()[:self.num_features].transpose()] nodes_mask_per_graph = [1] if self.variable_support: all_nodes = np.arange(len(state.feature_matrix)) actioned_or_actionable = np.concatenate((state.partial_solution_node_indexes, actionable_nodes)) all_non_considered_nodes = np.setxor1d(all_nodes, actioned_or_actionable) constrained_adj = sp.csr_matrix(self.undirected_adj) constrained_adj = zero_rows(self.sparse_undirected_adj, all_non_considered_nodes) constrained_adj = zero_columns(constrained_adj, all_non_considered_nodes) constrained_support = preprocess_adj(constrained_adj.tocoo()) support_per_graph = [constrained_support] else: support_per_graph = [self.sparse_constant_support] if self.zero_non_included_nodes: # zero all (non-actioned or non-actionable nodes) features - we do this so that these nodes have no affect on the actioned nodes (that we care about) during convolution all_nodes = np.arange(len(state.feature_matrix)) actioned_or_actionable = np.concatenate((state.partial_solution_node_indexes, actionable_nodes)) all_non_considered_nodes = np.setxor1d(all_nodes, actioned_or_actionable) features_per_graph[0][all_non_considered_nodes] = np.zeros(self.num_features, np.float32) feed = self.construct_masked_feed_dict(self.placeholders, features_per_graph, support_per_graph, FLAGS.num_simultaneous_graphs, len(actions_vector), nodes_mask_per_graph=nodes_mask_per_graph) #run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) #run_metadata = tf.RunMetadata() #probabilities = sess.run([self.model.masked_prediction_op],feed_dict=feed, options=run_options, run_metadata=run_metadata)[0] #tl = timeline.Timeline(run_metadata.step_stats) #ctf = tl.generate_chrome_trace_format() #with open('get_best_action_timeline.json', 'w') as f: # f.write(ctf) if logging.getLogger().getEffectiveLevel() == logging.DEBUG: probabilities = sess.run([self.model.prediction_op,self.model.pred_print_ops],feed_dict=feed)[0] else: probabilities = sess.run([self.model.prediction_op],feed_dict=feed)[0] logging.info("Probability map across actions for sample " + str(sample_idx) + " was: " + str(":".join(list(map(str,probabilities))))) selected_node_action = np.random.choice(range(probabilities.size), p=probabilities.ravel()) node_idx, allocation = np.unravel_index(selected_node_action, probabilities.shape) action = Action() action.node_idx = actionable_nodes[node_idx] action.label = allocation return action, probabilities[node_idx][allocation]