def __init__(self, env): super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint self.q_table = QTable(alpha=0.1, gamma=0.1) self.q_table_updater = QTableUpdater(self.q_table) self.total_actions = 0.0 self.total_rewards = 0.0
class LearningAgent(Agent): """An agent that learns to drive in the smartcab world.""" def __init__(self, env): super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint self.q_table = QTable(alpha=0.1, gamma=0.1) self.q_table_updater = QTableUpdater(self.q_table) self.total_actions = 0.0 self.total_rewards = 0.0 # self.last_occurence_of_punishment = 0.0 def set_q_table(self, alpha=0.0, gamma=0.0): self.q_table = QTable(alpha=alpha, gamma=gamma) self.q_table_updater = QTableUpdater(self.q_table) def reset(self, destination=None): self.planner.route_to(destination) # TODO: Prepare for a new trip; reset any variables here, if required def update(self, t): # Gather inputs self.next_waypoint = self.planner.next_waypoint() # from route planner, also displayed by simulator inputs = self.env.sense(self) deadline = self.env.get_deadline(self) # Update state self.state = 'light: {}, left: {}, oncoming: {}, next_waypoint: {}'.format(inputs['light'], inputs['left'], inputs['oncoming'], self.next_waypoint) # Select action according to your policy action = self.q_table.best_action(light=inputs['light'], next_waypoint=self.next_waypoint, left=inputs['left'], oncoming=inputs['oncoming']) # Execute action and get reward reward = self.env.act(self, action) # Learn policy based on state, action, reward self.q_table_updater.update(light=inputs['light'], next_waypoint=self.next_waypoint, left=inputs['left'], oncoming=inputs['oncoming'], action=action, reward=reward) self.total_rewards += reward self.total_actions += 1.0 print "LearningAgent.update(): deadline = {}, inputs = {}, action = {}, reward = {}, next_waypoint = {}".format(deadline, inputs, action, reward, self.next_waypoint) # [debug] def __init_q_table(self): self.q_table = {} def __positions(self): positions_list = [] for i in range(6): for j in range(8): positions_list.append((i+1,j+1)) return positions_list
def set_q_table(self, alpha=0.0, gamma=0.0): self.q_table = QTable(alpha=alpha, gamma=gamma) self.q_table_updater = QTableUpdater(self.q_table)