def __init__(self, env):
     super(LearningAgent, self).__init__(env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
     self.q_table = QTable(alpha=0.1, gamma=0.1)
     self.q_table_updater = QTableUpdater(self.q_table)
     self.total_actions = 0.0
     self.total_rewards = 0.0
class LearningAgent(Agent):
    """An agent that learns to drive in the smartcab world."""

    def __init__(self, env):
        super(LearningAgent, self).__init__(env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
        self.q_table = QTable(alpha=0.1, gamma=0.1)
        self.q_table_updater = QTableUpdater(self.q_table)
        self.total_actions = 0.0
        self.total_rewards = 0.0
        # self.last_occurence_of_punishment = 0.0

    def set_q_table(self, alpha=0.0, gamma=0.0):
        self.q_table = QTable(alpha=alpha, gamma=gamma)
        self.q_table_updater = QTableUpdater(self.q_table)

    def reset(self, destination=None):
        self.planner.route_to(destination)
        # TODO: Prepare for a new trip; reset any variables here, if required

    def update(self, t):
        # Gather inputs
        self.next_waypoint = self.planner.next_waypoint()  # from route planner, also displayed by simulator

        inputs = self.env.sense(self)
        deadline = self.env.get_deadline(self)

        # Update state
        self.state = 'light: {}, left: {}, oncoming: {}, next_waypoint: {}'.format(inputs['light'],
                inputs['left'],
                inputs['oncoming'],
                self.next_waypoint)

        # Select action according to your policy
        action = self.q_table.best_action(light=inputs['light'],
                next_waypoint=self.next_waypoint,
                left=inputs['left'],
                oncoming=inputs['oncoming'])

        # Execute action and get reward
        reward = self.env.act(self, action)

        # Learn policy based on state, action, reward
        self.q_table_updater.update(light=inputs['light'],
                next_waypoint=self.next_waypoint,
                left=inputs['left'],
                oncoming=inputs['oncoming'],
                action=action,
                reward=reward)

        self.total_rewards += reward
        self.total_actions += 1.0

        print "LearningAgent.update(): deadline = {}, inputs = {}, action = {}, reward = {}, next_waypoint = {}".format(deadline, inputs, action, reward, self.next_waypoint)  # [debug]

    def __init_q_table(self):
        self.q_table = {}

    def __positions(self):
        positions_list = []
        for i in range(6):
            for j in range(8):
                positions_list.append((i+1,j+1))
        return positions_list
 def set_q_table(self, alpha=0.0, gamma=0.0):
     self.q_table = QTable(alpha=alpha, gamma=gamma)
     self.q_table_updater = QTableUpdater(self.q_table)