def get_best_action(self, sess, state, actionable_nodes, actions_vector, sample_idx=0):

		if self.include_partial_solution:
			features_per_graph = [np.copy(state.feature_matrix)]
		else:
			features_per_graph = [state.feature_matrix.transpose()[:self.num_features].transpose()]

		nodes_mask_per_graph = [1]
		
		if self.variable_support:
			all_nodes = np.arange(len(state.feature_matrix))
			actioned_or_actionable = np.concatenate((state.partial_solution_node_indexes, actionable_nodes))
			all_non_considered_nodes = np.setxor1d(all_nodes, actioned_or_actionable)
			constrained_adj = sp.csr_matrix(self.undirected_adj)
			constrained_adj = zero_rows(self.sparse_undirected_adj, all_non_considered_nodes)
			constrained_adj = zero_columns(constrained_adj, all_non_considered_nodes)
			constrained_support = preprocess_adj(constrained_adj.tocoo())
			support_per_graph = [constrained_support]
		else:
			support_per_graph = [self.sparse_constant_support]
		
		if self.zero_non_included_nodes:
			# zero all (non-actioned or non-actionable nodes) features - we do this so that these nodes have no affect on the actioned nodes (that we care about) during convolution
			all_nodes = np.arange(len(state.feature_matrix))
			actioned_or_actionable = np.concatenate((state.partial_solution_node_indexes, actionable_nodes))
			all_non_considered_nodes = np.setxor1d(all_nodes, actioned_or_actionable)
			features_per_graph[0][all_non_considered_nodes] = np.zeros(self.num_features, np.float32)
		
		feed = self.construct_masked_feed_dict(self.placeholders, features_per_graph, support_per_graph, FLAGS.num_simultaneous_graphs, len(actions_vector), nodes_mask_per_graph=nodes_mask_per_graph)

		#run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
		#run_metadata = tf.RunMetadata()

		#probabilities = sess.run([self.model.masked_prediction_op],feed_dict=feed, options=run_options, run_metadata=run_metadata)[0]
		#tl = timeline.Timeline(run_metadata.step_stats)
		#ctf = tl.generate_chrome_trace_format()
		#with open('get_best_action_timeline.json', 'w') as f:
		#	f.write(ctf)
		if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
			probabilities = sess.run([self.model.prediction_op,self.model.pred_print_ops],feed_dict=feed)[0]
		else:
			probabilities = sess.run([self.model.prediction_op],feed_dict=feed)[0]

		logging.info("Probability map across actions for sample " + str(sample_idx) + " was: " + str(":".join(list(map(str,probabilities)))))

		selected_node_action = np.random.choice(range(probabilities.size), p=probabilities.ravel()) 
		node_idx, allocation = np.unravel_index(selected_node_action, probabilities.shape) 

		action = Action()
		action.node_idx = actionable_nodes[node_idx]
		action.label = allocation

		return action, probabilities[node_idx][allocation]
Exemple #2
0
	def get_best_actions(self, sess, state_per_transition, actionable_nodes_per_transition, actions_vector):

		logging.debug("There are " + str(len(state_per_transition)) + " states per transitions.")

		features_per_graph = [state.feature_matrix for state in state_per_transition]
		nodes_mask_per_graph = [actionable_node_indexes for actionable_node_indexes in actionable_nodes_per_transition]

		logging.debug("Getting best actions")	
		feed = construct_masked_feed_dict(self.placeholders, features_per_graph, self.support, FLAGS.num_simultaneous_graphs, len(actions_vector), nodes_mask_per_graph=nodes_mask_per_graph)

		rewards = sess.run([self.model.masked_prediction_op],feed_dict=feed)[0]
		
		best_future_action_per_transition = []
		best_future_reward_per_transition = []

		# TODO should be general to output_dim, not just cpu/gpu
		num_nodes_analyzed = 0
		for graph_idx in range(len(state_per_transition)):

			rewards_for_transition = rewards[num_nodes_analyzed:num_nodes_analyzed+len(actionable_nodes_per_transition[graph_idx])]

			# length of this vector == actionable_nodes_per_transition[graph_idx]
			cpu_rewards = rewards_for_transition.transpose()[0].transpose()
			gpu_rewards = rewards_for_transition.transpose()[1].transpose()

			max_cpu_reward = np.amax(cpu_rewards)
			max_gpu_reward = np.amax(gpu_rewards)

			best_cpu_index = np.random.choice(np.argwhere(cpu_rewards == max_cpu_reward).flatten(),1)[0]
			best_gpu_index = np.random.choice(np.argwhere(gpu_rewards == max_gpu_reward).flatten(),1)[0]

			action = Action()

			if max_cpu_reward == max_gpu_reward:

				allocation = np.random.choice(actions_vector,1)[0]
				
				action.node_idx = [actionable_nodes_per_transition[graph_idx][best_cpu_index], actionable_nodes_per_transition[graph_idx][best_gpu_index]][allocation]
				action.label = allocation
				best_reward = max_cpu_reward

			elif max_cpu_reward > max_gpu_reward:

				action.node_idx = actionable_nodes_per_transition[graph_idx][best_cpu_index]
				action.label = 0
				best_reward = max_cpu_reward

			elif max_cpu_reward < max_gpu_reward:

				action.node_idx = actionable_nodes_per_transition[graph_idx][best_gpu_index]
				action.label = 1
				best_reward = max_gpu_reward

			best_future_action_per_transition.append(action)
			best_future_reward_per_transition.append(best_reward)

			num_nodes_analyzed += len(actionable_nodes_per_transition[graph_idx])

		return best_future_action_per_transition, best_future_reward_per_transition
Exemple #3
0
 def construct_action(self, action_num):
     if self.demand_curve_shape == DemandCurveShape.RECIPROCAL:
         indices = np.unravel_index(action_num, (POSSIBLE_UNITS_PERSON.shape[0], 
             POSSIBLE_PRICES_PERSON.shape[0], POSSIBLE_RECIP_DEMAND_PARAMS_PERSON.shape[0]))
         units = POSSIBLE_UNITS_PERSON[indices[0]]
         price = POSSIBLE_PRICES_PERSON[indices[1]]
         demand_curve = reciprocal(POSSIBLE_RECIP_DEMAND_PARAMS_PERSON[indices[2]])(NUM_GOODS_MAX_BUY)
     elif self.demand_curve_shape == DemandCurveShape.LINEAR:
         indices = np.unravel_index(action_num, (POSSIBLE_UNITS_PERSON.shape[0], 
             POSSIBLE_PRICES_PERSON.shape[0], POSSIBLE_LIN_DEMAND_PARAMS_PERSON.shape[0]))
         units = POSSIBLE_UNITS_PERSON[indices[0]]
         price = POSSIBLE_PRICES_PERSON[indices[1]]
         demand_curve = linear(POSSIBLE_LIN_DEMAND_PARAMS_PERSON[indices[2]])(NUM_GOODS_MAX_BUY)
     return Action(price, units, demand_curve)
Exemple #4
0
    def get_action(self, model):
        if self.rltype == RLType.TRIVIAL:
            demand_curve = np.array([20])
            return Action(10, self.num_hours_to_work, demand_curve)

        if self.rltype == RLType.REINFORCE:
            # The state should be the model environment (Model)
            state_input = self.deconstruct_state(model)
            action_num = self.policy_net.choose_action(state_input)
            return self.construct_action(action_num)

        if self.rltype == RLType.Q_ACTOR_CRITIC:
            state_input = self.deconstruct_state(model)
            action_num = self.actor_critic.choose_action(state_input)
            return self.construct_action(action_num)
Exemple #5
0
 def __init__(self, map_file_name, curiosity=True):
     self.env = Environment(map_file_name)
     self.cat = Cat(("orange", (255, 165, 0)), 0, 0)
     self.mouse = Mouse(("gray", (128, 128, 128)), 0, 0)
     self.cheese = Cheese(("yellow", (255, 255, 0)), 0, 0)
     self.init_agents_position()
     self.action = Action()
     self.feed = 0
     self.eaten = 0
     self.age = 0
     self.ai = AI()
     pygame.init()
     self.size = 40
     self.screen = None
     self.activated = False
     self.curiosity = curiosity
Exemple #6
0
    def scoring(y_real, y_predicted):
        return sum(y_predicted)[-1] / (len(y_predicted) - 1)

    for i in range(10):
        print('#' * 80)
        print(f'# GENERATION {i + 1}')
        print('#' * 80)
        x = np.array(walker.state_history[:-1])
        y = np.array([
            list(a) + [r]
            for a, r in zip(walker.action_history, walker.reward_history)
        ])

        walker.save_history(f'sillywalker{i+1}')
        model = TPOTRegressor(generations=5,
                              population_size=20,
                              scoring=scoring,
                              verbosity=2,
                              config_dict=regressor_config_dict_light)
        model.fit(x, y)
        for _ in range(10):
            while not walker.done:
                s = walker.state
                prediction = model.predict(np.array([s]))[0]
                print(prediction)

                action = Action(*prediction[:-1])
                walker.step(action)

            walker.reset()
Exemple #7
0
import numpy as np
from neupy import layers, storage, algorithms
from neupy.exceptions import StopTraining

from agent import SillyWalker, Action, create_net

net = create_net()

storage.load(
    net,
    'nets/net',
)

walker = SillyWalker()

while not walker.done:
    s = np.array([list(walker.state) + [1]])
    prediction = net.predict(s)[0]

    walker.step(Action(*prediction))

    walker._env.render()
    def __init__(self, agent, w=0, h=0, nb_trashes=0):
        '''
        Initialize the environment

        :param agent: the agent to add in the environment
        :param w: width of the environment (not including walls)
        :param h: height of the environment (not including walls)
        :param nb_trashes: number of trashes in the environment
        '''

        self.width = self.default_parameters[
            'width'] if w == 0 else w  # setting width
        self.height = self.default_parameters[
            'height'] if h == 0 else h  # setting height

        self.obstacles = self.default_parameters[
            'obstacles']  # set the obstacles

        # stuffs related to the Agent (action space, state space, the agent itself)
        self.action_space_n = Action.size()  # cardinality of action space
        self.state_space_n = (self.width + 1) * (
            self.height + 1)  # cardinality of action space : Position of agent
        self.agent = agent  # add the agent to the environment

        # start throwing trashes around to get the agent a job
        self.nb_trashes = self.default_parameters[
            'nb_trashes'] if nb_trashes == 0 else nb_trashes
        self.trashes = []  # all positions of trashes
        i = 0
        random.seed(
            self.nb_trashes
        )  # to ensure that every time the random will return the same sequence
        while i < self.nb_trashes:
            x = random.randint(1, self.width)
            y = random.randint(1, self.height)

            # if newly generated position is not that of another trash / an obstacle / the initial position of the agent
            if (x, y) not in self.trashes and (
                    x, y) not in self.obstacles and (x, y) != agent.position:
                self.trashes.append((x, y))
                i += 1

        # for conversion between position and tile #
        # this will help when using Q_table #
        self.pairs = [(i, j) for i in range(self.width + 1)
                      for j in range(self.height + 1)]
        self.fig = figure(figsize=(7, 7))
        self.ax = self.fig.add_subplot(1, 1, 1)
        self.xticks = np.arange(-0.5, self.width + 0.5, 1)
        self.yticks = np.arange(-0.5, self.height + 0.5, 1)
        self.ax.grid()
        self.ax.set_xticks(self.xticks)
        self.ax.set_yticks(self.yticks)
        self.ax.plot(np.array(self.trashes)[:, 0],
                     np.array(self.trashes)[:, 1],
                     "co",
                     markersize=30,
                     alpha=0.2)
        self.ax.plot(np.array(self.obstacles)[:, 0],
                     np.array(self.obstacles)[:, 1],
                     "ks",
                     markersize=30,
                     alpha=0.4)