class ArrowConstructionTransformTrainer(ArrowConstructionTransform): def __init__(self): # Initialize super. ArrowConstructionTransform.__init__(self) # Define training world. self.training_world = World(1, 1) self.reset_training_world() # Learn the Q-table. def learn_policy(self): # Initialize Q-learner. qlearner = QLearner( \ self.state_space, self.actions, self.handle_action, self.reset_training_world ) # Initialize reward states. goal_states = [ \ ( 0, self.state_space[1].index(World.ArrowState.Arrows_Complete) ), ( 1, self.state_space[1].index(World.ArrowState.Arrows_Complete) ) ] for goal_state in goal_states: qlearner.set_r_value( goal_state, 100 ) #print qlearner.r_table # Run Q-learner. qlearner.execute(goal_states, 300, 30) # Return policy. return qlearner.get_policy() # Expands the reduced state into the training world. def expand_training_state(self, reduced_state): # Set world state based on reduced state. expanded_state = World.State.from_state(self.training_world.agent_state) expanded_state.arrows = self.state_space[1][reduced_state[1]] expanded_state.minerals = expanded_state._world.needed_minerals if self.state_space[0][reduced_state[0]] else 0 expanded_state.bamboo = expanded_state._world.needed_bamboo if self.state_space[0][reduced_state[0]] else 0 return expanded_state # Resets the training world. def reset_training_world(self): self.training_world.world_state[0][0] = World.SiteState.Useless # Handles an action. def handle_action(self, reduced_state, action_index): action = self.actions[action_index] expanded_state = self.expand_training_state(reduced_state) #print " -- Reduced state : " + str(reduced_state) #print " -- Expanded state: " + str(expanded_state) expanded_state = self.training_world.perform_action(expanded_state, action) #print " -- Expanded state: " + str(expanded_state) new_reduced_state = self.reduce_state(expanded_state) #print " -- Reduced state : " + str(new_reduced_state) return new_reduced_state
class BambooTransformTrainer(BambooTransform): def __init__(self): # Initialize super. BambooTransform.__init__(self) # Define training world. self.training_world = World(1, 1) self.reset_training_world() # Learn the Q-table. def learn_policy(self): # Initialize Q-learner. qlearner = QLearner( \ self.state_space, self.actions, self.handle_action, self.reset_training_world ) # Initialize reward states. goal_states = [ \ ( self.state_space[0].index(World.SiteState.Useless), ), ( self.state_space[0].index(World.SiteState.Bamboo_Planted), ) ] for goal_state in goal_states: qlearner.set_r_value( goal_state, 100 ) #print qlearner.r_table # Run Q-learner. qlearner.execute(goal_states, 250, 15) # Return policy. return qlearner.get_policy() # Expands the reduced state into the training world. def expand_training_state(self, reduced_state): # Set world state based on reduced state. expanded_state = World.State.from_state(self.training_world.agent_state) self.training_world.world_state[0][0] = self.state_space[0][reduced_state[0]] return expanded_state # Resets the training world. def reset_training_world(self): self.training_world.world_state[0][0] = World.SiteState.Useless # Handles an action. def handle_action(self, reduced_state, action_index): action = self.actions[action_index] expanded_state = self.expand_training_state(reduced_state) #print " -- Reduced state : " + str(reduced_state) #print " -- Expanded state: " + str(expanded_state) expanded_state = self.training_world.perform_action(expanded_state, action) #print " -- Expanded state: " + str(expanded_state) new_reduced_state = self.reduce_state(expanded_state) #print " -- Reduced state : " + str(new_reduced_state) return new_reduced_state
class PositionTransformTrainer(PositionTransform): def __init__(self): # Initialize super. PositionTransform.__init__(self) # Define training world. self.training_world = World(3, 3) self.training_world.world_state[PositionTransform.VerticleState.At + 1][ PositionTransform.HorizontalState.At + 1 ] = World.SiteState.Mineral_Deposit # Learn the Q-table. def learn_policy(self): # Initialize Q-learner. qlearner = QLearner(self.state_space, self.actions, self.handle_action, self.reset_training_world) # Initialize reward states. goal_states = [(PositionTransform.HorizontalState.At + 1, PositionTransform.VerticleState.At + 1)] for goal_state in goal_states: qlearner.set_r_value(goal_state, 100) # print qlearner.r_table # Run Q-learner. qlearner.execute(goal_states, 300, 50) # Return policy. return qlearner.get_policy() # Expands the reduced state into the training world. def expand_training_state(self, reduced_state): # Set world state based on reduced state. expanded_state = World.State.from_state(self.training_world.agent_state) expanded_state.x = reduced_state[0] expanded_state.y = reduced_state[1] return expanded_state # Resets the training world. def reset_training_world(self): # Training world does not require a reset in this transformation. pass # Handles an action. def handle_action(self, reduced_state, action_index): action = self.actions[action_index] expanded_state = self.expand_training_state(reduced_state) # print " -- Reduced state : " + str(reduced_state) # print " -- Expanded state: " + str(expanded_state) expanded_state = self.training_world.perform_action(expanded_state, action) # print " -- Expanded state: " + str(expanded_state) new_reduced_state = self.reduce_state(expanded_state) # print " -- Reduced state : " + str(new_reduced_state) return new_reduced_state
class FullTransformTrainer(FullTransform): def __init__(self, cell_width, cell_height): # Initialize super. FullTransform.__init__(self, cell_width, cell_height) # Define training world. self.training_world = World(cell_width, cell_height) self.reset_training_world() # Learn the Q-table. def learn_policy(self): # Initialize Q-learner. qlearner = QLearner( \ self.state_space, self.actions, self.handle_action, self.reset_training_world ) # Initialize goal states. goal_states = [] print "Enumerating goal states..." print self.state_space_dim for state_index in xrange(qlearner.r_table.size): state = numpy.unravel_index(state_index, qlearner.r_table.shape) if state[FullTransform.StateOffset.Arrows] == World.ArrowState.Arrows_Complete: goal_states.append(tuple(state)) print "Goal states: %d" % len(goal_states) for goal_state in goal_states: qlearner.set_r_value( goal_state, 100 ) #print qlearner.r_table # Run Q-learner. print "Total states: %d" % (qlearner.r_table.size) qlearner.execute(goal_states, 500000, 50) # Return policy. return qlearner.get_policy() # Expands the reduced state into the training world. def expand_training_state(self, reduced_state): # Set world state based on reduced state. expanded_state = World.State.from_state(self.training_world.agent_state) world = expanded_state._world # Set world state. for index in xrange(self.world_size): (x, y) = numpy.unravel_index(index, self.world_shape) world.world_state[y][x] # Set other elements. expanded_state.x = reduced_state[FullTransform.StateOffset.X] expanded_state.y = reduced_state[FullTransform.StateOffset.Y] expanded_state.minerals = 50 * reduced_state[FullTransform.StateOffset.Minerals] expanded_state.bamboo = 50 * reduced_state[FullTransform.StateOffset.Bamboo] expanded_state.arrows = reduced_state[FullTransform.StateOffset.Arrows] return expanded_state # Resets the training world. def reset_training_world(self): pass # Handles an action. def handle_action(self, reduced_state, action_index): action = self.actions[action_index] expanded_state = self.expand_training_state(reduced_state) expanded_state = self.training_world.perform_action(expanded_state, action) new_reduced_state = self.reduce_state(expanded_state) return new_reduced_state