class ArrowConstructionTransformTrainer(ArrowConstructionTransform):
    def __init__(self):
        # Initialize super.
        ArrowConstructionTransform.__init__(self)
        
        # Define training world.
        self.training_world = World(1, 1)
        self.reset_training_world()
    
    # Learn the Q-table.
    def learn_policy(self):
        # Initialize Q-learner.
        qlearner = QLearner( \
            self.state_space,
            self.actions,
            self.handle_action,
            self.reset_training_world )
        
        # Initialize reward states.
        goal_states = [ \
            ( 0, self.state_space[1].index(World.ArrowState.Arrows_Complete) ),
            ( 1, self.state_space[1].index(World.ArrowState.Arrows_Complete) ) ]
        for goal_state in goal_states:
            qlearner.set_r_value( goal_state, 100 )
        
        #print qlearner.r_table
        
        # Run Q-learner.
        qlearner.execute(goal_states, 300, 30)
        
        # Return policy.
        return qlearner.get_policy()
    
    # Expands the reduced state into the training world.
    def expand_training_state(self, reduced_state):
        # Set world state based on reduced state.
        expanded_state = World.State.from_state(self.training_world.agent_state)
        expanded_state.arrows = self.state_space[1][reduced_state[1]]
        expanded_state.minerals = expanded_state._world.needed_minerals if self.state_space[0][reduced_state[0]] else 0
        expanded_state.bamboo = expanded_state._world.needed_bamboo if self.state_space[0][reduced_state[0]] else 0
        return expanded_state
    
    # Resets the training world.
    def reset_training_world(self):
        self.training_world.world_state[0][0] = World.SiteState.Useless
    
    # Handles an action.
    def handle_action(self, reduced_state, action_index):
        action = self.actions[action_index]
        expanded_state = self.expand_training_state(reduced_state)
        #print " -- Reduced state : " + str(reduced_state)
        #print " -- Expanded state: " + str(expanded_state)
        expanded_state = self.training_world.perform_action(expanded_state, action)
        #print " -- Expanded state: " + str(expanded_state)
        new_reduced_state = self.reduce_state(expanded_state)
        #print " -- Reduced state : " + str(new_reduced_state)
                
        return new_reduced_state
class BambooTransformTrainer(BambooTransform):
    def __init__(self):
        # Initialize super.
        BambooTransform.__init__(self)
        
        # Define training world.
        self.training_world = World(1, 1)
        self.reset_training_world()
    
    # Learn the Q-table.
    def learn_policy(self):
        # Initialize Q-learner.
        qlearner = QLearner( \
            self.state_space,
            self.actions,
            self.handle_action,
            self.reset_training_world )
        
        # Initialize reward states.
        goal_states = [ \
            ( self.state_space[0].index(World.SiteState.Useless), ),
            ( self.state_space[0].index(World.SiteState.Bamboo_Planted), ) ]
        for goal_state in goal_states:
            qlearner.set_r_value( goal_state, 100 )
        
        #print qlearner.r_table
        
        # Run Q-learner.
        qlearner.execute(goal_states, 250, 15)
        
        # Return policy.
        return qlearner.get_policy()
    
    # Expands the reduced state into the training world.
    def expand_training_state(self, reduced_state):
        # Set world state based on reduced state.
        expanded_state = World.State.from_state(self.training_world.agent_state)
        self.training_world.world_state[0][0] = self.state_space[0][reduced_state[0]]
        return expanded_state
    
    # Resets the training world.
    def reset_training_world(self):
        self.training_world.world_state[0][0] = World.SiteState.Useless
    
    # Handles an action.
    def handle_action(self, reduced_state, action_index):
        action = self.actions[action_index]
        expanded_state = self.expand_training_state(reduced_state)
        #print " -- Reduced state : " + str(reduced_state)
        #print " -- Expanded state: " + str(expanded_state)
        expanded_state = self.training_world.perform_action(expanded_state, action)
        #print " -- Expanded state: " + str(expanded_state)
        new_reduced_state = self.reduce_state(expanded_state)
        #print " -- Reduced state : " + str(new_reduced_state)
                
        return new_reduced_state
class PositionTransformTrainer(PositionTransform):
    def __init__(self):
        # Initialize super.
        PositionTransform.__init__(self)

        # Define training world.
        self.training_world = World(3, 3)
        self.training_world.world_state[PositionTransform.VerticleState.At + 1][
            PositionTransform.HorizontalState.At + 1
        ] = World.SiteState.Mineral_Deposit

    # Learn the Q-table.
    def learn_policy(self):
        # Initialize Q-learner.
        qlearner = QLearner(self.state_space, self.actions, self.handle_action, self.reset_training_world)

        # Initialize reward states.
        goal_states = [(PositionTransform.HorizontalState.At + 1, PositionTransform.VerticleState.At + 1)]
        for goal_state in goal_states:
            qlearner.set_r_value(goal_state, 100)

        # print qlearner.r_table

        # Run Q-learner.
        qlearner.execute(goal_states, 300, 50)

        # Return policy.
        return qlearner.get_policy()

    # Expands the reduced state into the training world.
    def expand_training_state(self, reduced_state):
        # Set world state based on reduced state.
        expanded_state = World.State.from_state(self.training_world.agent_state)
        expanded_state.x = reduced_state[0]
        expanded_state.y = reduced_state[1]
        return expanded_state

    # Resets the training world.
    def reset_training_world(self):
        # Training world does not require a reset in this transformation.
        pass

    # Handles an action.
    def handle_action(self, reduced_state, action_index):
        action = self.actions[action_index]
        expanded_state = self.expand_training_state(reduced_state)
        # print " -- Reduced state : " + str(reduced_state)
        # print " -- Expanded state: " + str(expanded_state)
        expanded_state = self.training_world.perform_action(expanded_state, action)
        # print " -- Expanded state: " + str(expanded_state)
        new_reduced_state = self.reduce_state(expanded_state)
        # print " -- Reduced state : " + str(new_reduced_state)
        return new_reduced_state
class FullTransformTrainer(FullTransform):
    def __init__(self, cell_width, cell_height):
        # Initialize super.
        FullTransform.__init__(self, cell_width, cell_height)
        
        # Define training world.
        self.training_world = World(cell_width, cell_height)
        self.reset_training_world()
    
    # Learn the Q-table.
    def learn_policy(self):
        # Initialize Q-learner.
        qlearner = QLearner( \
            self.state_space,
            self.actions,
            self.handle_action,
            self.reset_training_world )
        
        # Initialize goal states.
        goal_states = []
        print "Enumerating goal states..."
        print self.state_space_dim
        for state_index in xrange(qlearner.r_table.size):
            state = numpy.unravel_index(state_index, qlearner.r_table.shape)
            if state[FullTransform.StateOffset.Arrows] == World.ArrowState.Arrows_Complete:
                goal_states.append(tuple(state))
        print "Goal states: %d" % len(goal_states)
        
        for goal_state in goal_states:
            qlearner.set_r_value( goal_state, 100 )
        
        #print qlearner.r_table
        
        # Run Q-learner.
        print "Total states: %d" % (qlearner.r_table.size)
        qlearner.execute(goal_states, 500000, 50)
        
        # Return policy.
        return qlearner.get_policy()
    
    # Expands the reduced state into the training world.
    def expand_training_state(self, reduced_state):
        # Set world state based on reduced state.
        expanded_state = World.State.from_state(self.training_world.agent_state)
        world = expanded_state._world
        
        # Set world state.
        for index in xrange(self.world_size):
            (x, y) = numpy.unravel_index(index, self.world_shape)
            world.world_state[y][x]
        
        # Set other elements.
        expanded_state.x = reduced_state[FullTransform.StateOffset.X]
        expanded_state.y = reduced_state[FullTransform.StateOffset.Y]
        expanded_state.minerals = 50 * reduced_state[FullTransform.StateOffset.Minerals]
        expanded_state.bamboo = 50 * reduced_state[FullTransform.StateOffset.Bamboo]
        expanded_state.arrows = reduced_state[FullTransform.StateOffset.Arrows]
            
        return expanded_state
    
    # Resets the training world.
    def reset_training_world(self):
        pass
        
    # Handles an action.
    def handle_action(self, reduced_state, action_index):
        action = self.actions[action_index]
        expanded_state = self.expand_training_state(reduced_state)
        expanded_state = self.training_world.perform_action(expanded_state, action)
        new_reduced_state = self.reduce_state(expanded_state)
                
        return new_reduced_state