Beispiel #1
0
    def __init__(self, env, size=None, update_delay=1.0, display=True, live_plot=True):
        self.env = env
        self.size = size if size is not None else ((self.env.grid_size[0] + 1) * self.env.block_size, (self.env.grid_size[1] + 1) * self.env.block_size)
        self.width, self.height = self.size
        
        self.bg_color = self.colors['white']
        self.road_width = 5
        self.road_color = self.colors['black']

        self.quit = False
        self.start_time = None
        self.current_time = 0.0
        self.last_updated = 0.0
        self.update_delay = update_delay  # duration between each step (in secs)

        self.display = display
        if self.display:
            try:
                self.pygame = importlib.import_module('pygame')
                self.pygame.init()
                self.screen = self.pygame.display.set_mode(self.size)

                self.frame_delay = max(1, int(self.update_delay * 1000))  # delay between GUI frames in ms (min: 1)
                self.agent_sprite_size = (32, 32)
                self.agent_circle_radius = 10  # radius of circle, when using simple representation
                for agent in self.env.agent_states:
                    agent._sprite = self.pygame.transform.smoothscale(self.pygame.image.load(os.path.join("images", "car-{}.png".format(agent.color))), self.agent_sprite_size)
                    agent._sprite_size = (agent._sprite.get_width(), agent._sprite.get_height())

                self.font = self.pygame.font.Font(None, 28)
                self.paused = False
            except ImportError as e:
                self.display = False
                print "Simulator.__init__(): Unable to import pygame; display disabled.\n{}: {}".format(e.__class__.__name__, e)
            except Exception as e:
                self.display = False
                print "Simulator.__init__(): Error initializing GUI objects; display disabled.\n{}: {}".format(e.__class__.__name__, e)

          # Setup metrics to report
        self.live_plot = live_plot
        self.rep = Reporter(metrics=['net_reward', 'avg_net_reward', 'final_deadline', 'success'], live_plot=self.live_plot)
        self.avg_net_reward_window = 30
    def __init__(self, env, size=None, update_delay=1.0, display=True, live_plot=False):
        self.env = env
        self.size = size if size is not None else ((self.env.grid_size[0] + 1) * self.env.block_size, (self.env.grid_size[1] + 1) * self.env.block_size)
        self.width, self.height = self.size
        
        self.bg_color = self.colors['white']
        self.road_width = 5
        self.road_color = self.colors['black']

        self.quit = False
        self.start_time = None
        self.current_time = 0.0
        self.last_updated = 0.0
        self.update_delay = update_delay  # duration between each step (in secs)

        self.display = display
        if self.display:
            try:
                self.pygame = importlib.import_module('pygame')
                self.pygame.init()
                self.screen = self.pygame.display.set_mode(self.size)

                self.frame_delay = max(1, int(self.update_delay * 1000))  # delay between GUI frames in ms (min: 1)
                self.agent_sprite_size = (32, 32)
                self.agent_circle_radius = 10  # radius of circle, when using simple representation
                for agent in self.env.agent_states:
                    agent._sprite = self.pygame.transform.smoothscale(self.pygame.image.load(os.path.join("images", "car-{}.png".format(agent.color))), self.agent_sprite_size)
                    agent._sprite_size = (agent._sprite.get_width(), agent._sprite.get_height())

                self.font = self.pygame.font.Font(None, 28)
                self.paused = False
            except ImportError as e:
                self.display = False
                #print "Simulator.__init__(): Unable to import pygame; display disabled.\n{}: {}".format(e.__class__.__name__, e)
            except Exception as e:
                self.display = False
               # print "Simulator.__init__(): Error initializing GUI objects; display disabled.\n{}: {}".format(e.__class__.__name__, e)

        # Setup metrics to report
        self.live_plot = live_plot
        self.rep = Reporter(metrics=['net_reward', 'avg_net_reward', 'final_deadline', 'success'], live_plot=self.live_plot)
        self.avg_net_reward_window = 10
Beispiel #3
0
    def __init__(self, env):
        super(LearningAgent, self).__init__(
            env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
        # TODO: Initialize any additional variables here
        self.actions = [None, 'forward', 'left', 'right']
        # Parameters in the Q - Learning algorithm
        self.alpha = 0.5  # learning rate (alpha) 0 < alpha < 1
        self.gamma = 0.8  # discount factor(gamma) 0 < gamma < 1
        self.eps = 1  # exploration rate(epsilon) 0 < eps < 1
        self.action_0 = None  # Action in t-1
        self.reward_0 = None  # Reward in t-1
        self.state_0 = None  # State in t-1
        self.Q = {}  # Empty Q table
        self.experience = 1 # smartcab "experience", increases with time

        # Setup infraction metrics to report
        self.live_plot = False
        self.infractions_rep = Reporter(metrics=['invalid_moves'], live_plot=self.live_plot)
class Simulator(object):
    """Simulates agents in a dynamic smartcab environment.

    Uses PyGame to display GUI, if available.
    """

    colors = {
        'black'   : (  0,   0,   0),
        'white'   : (255, 255, 255),
        'red'     : (255,   0,   0),
        'green'   : (  0, 255,   0),
        'blue'    : (  0,   0, 255),
        'cyan'    : (  0, 200, 200),
        'magenta' : (200,   0, 200),
        'yellow'  : (255, 255,   0),
        'orange'  : (255, 128,   0)
    }

    def __init__(self, env, size=None, update_delay=1.0, display=True, live_plot=False):
        self.env = env
        self.size = size if size is not None else ((self.env.grid_size[0] + 1) * self.env.block_size, (self.env.grid_size[1] + 1) * self.env.block_size)
        self.width, self.height = self.size
        
        self.bg_color = self.colors['white']
        self.road_width = 5
        self.road_color = self.colors['black']

        self.quit = False
        self.start_time = None
        self.current_time = 0.0
        self.last_updated = 0.0
        self.update_delay = update_delay  # duration between each step (in secs)

        self.display = display
        if self.display:
            try:
                self.pygame = importlib.import_module('pygame')
                self.pygame.init()
                self.screen = self.pygame.display.set_mode(self.size)

                self.frame_delay = max(1, int(self.update_delay * 1000))  # delay between GUI frames in ms (min: 1)
                self.agent_sprite_size = (32, 32)
                self.agent_circle_radius = 10  # radius of circle, when using simple representation
                for agent in self.env.agent_states:
                    agent._sprite = self.pygame.transform.smoothscale(self.pygame.image.load(os.path.join("images", "car-{}.png".format(agent.color))), self.agent_sprite_size)
                    agent._sprite_size = (agent._sprite.get_width(), agent._sprite.get_height())

                self.font = self.pygame.font.Font(None, 28)
                self.paused = False
            except ImportError as e:
                self.display = False
                #print "Simulator.__init__(): Unable to import pygame; display disabled.\n{}: {}".format(e.__class__.__name__, e)
            except Exception as e:
                self.display = False
               # print "Simulator.__init__(): Error initializing GUI objects; display disabled.\n{}: {}".format(e.__class__.__name__, e)

        # Setup metrics to report
        self.live_plot = live_plot
        self.rep = Reporter(metrics=['net_reward', 'avg_net_reward', 'final_deadline', 'success'], live_plot=self.live_plot)
        self.avg_net_reward_window = 10

    def run(self, n_trials=1):
        self.quit = False
        self.rep.reset()
        for trial in xrange(n_trials):
            print "Simulator.run(): Trial {}".format(trial)  # [debug]
            self.env.reset()
            self.current_time = 0.0
            self.last_updated = 0.0
            self.start_time = time.time()
            prev_penalty = self.env.primary_agent.n_penalties
            while True:
                try:
                    # Update current time
                    self.current_time = time.time() - self.start_time
                    #print "Simulator.run(): current_time = {:.3f}".format(self.current_time)

                    # Handle GUI events
                    if self.display:
                        for event in self.pygame.event.get():
                            if event.type == self.pygame.QUIT:
                                self.quit = True
                            elif event.type == self.pygame.KEYDOWN:
                                if event.key == 27:  # Esc
                                    self.quit = True
                                elif event.unicode == u' ':
                                    self.paused = True

                        if self.paused:
                            self.pause()

                    # Update environment
                    if self.current_time - self.last_updated >= self.update_delay:
                        self.env.step()
                        # TODO: Log step data
                        self.last_updated = self.current_time

                    # Render GUI and sleep
                    if self.display:
                        self.render()
                        self.pygame.time.wait(self.frame_delay)
                except KeyboardInterrupt:
                    self.quit = True
                finally:
                    if self.quit or self.env.done:
                        state = self.env.agent_states[self.env.primary_agent]                        
                        if state['location'] != state['destination']:
                            self.env.primary_agent.last_dest_fail = trial + 1
                        if self.env.primary_agent.n_penalties > prev_penalty:
                            self.env.primary_agent.last_penalty = trial + 1
                        break
            
            p_agent = self.env.primary_agent
            if self.quit:
                break
        
            # Collect/update metrics
            self.rep.collect('net_reward', trial, self.env.trial_data['net_reward'])  # total reward obtained in this trial
            self.rep.collect('avg_net_reward', trial, np.mean(self.rep.metrics['net_reward'].ydata[-self.avg_net_reward_window:]))  # rolling mean of reward
            self.rep.collect('final_deadline', trial, self.env.trial_data['final_deadline'])  # final deadline value (time remaining)
            self.rep.collect('success', trial, self.env.trial_data['success'])
            if self.live_plot:
                self.rep.refresh_plot()  # autoscales axes, draws stuff and flushes events

        # Report final metrics
        if self.display:
            self.pygame.display.quit()  # need to shutdown pygame before showing metrics plot
            # TODO: Figure out why having both game and plot displays makes things crash!

        if self.live_plot:
            self.rep.show_plot()  # holds till user closes plot window
        
        return (p_agent.n_dest_reached, p_agent.last_dest_fail, 
                p_agent.sum_time_left, p_agent.n_penalties,
                p_agent.last_penalty, len(p_agent.qvals))

    def render(self):
        # Clear screen
        self.screen.fill(self.bg_color)

        # Draw elements
        # * Static elements
        for road in self.env.roads:
            self.pygame.draw.line(self.screen, self.road_color, (road[0][0] * self.env.block_size, road[0][1] * self.env.block_size), (road[1][0] * self.env.block_size, road[1][1] * self.env.block_size), self.road_width)

        for intersection, traffic_light in self.env.intersections.iteritems():
            self.pygame.draw.circle(self.screen, self.road_color, (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size), 10)
            if traffic_light.state:  # North-South is open
                self.pygame.draw.line(self.screen, self.colors['green'],
                    (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size - 15),
                    (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size + 15), self.road_width)
            else:  # East-West is open
                self.pygame.draw.line(self.screen, self.colors['green'],
                    (intersection[0] * self.env.block_size - 15, intersection[1] * self.env.block_size),
                    (intersection[0] * self.env.block_size + 15, intersection[1] * self.env.block_size), self.road_width)

        # * Dynamic elements
        for agent, state in self.env.agent_states.iteritems():
            # Compute precise agent location here (back from the intersection some)
            agent_offset = (2 * state['heading'][0] * self.agent_circle_radius, 2 * state['heading'][1] * self.agent_circle_radius)
            agent_pos = (state['location'][0] * self.env.block_size - agent_offset[0], state['location'][1] * self.env.block_size - agent_offset[1])
            agent_color = self.colors[agent.color]
            if hasattr(agent, '_sprite') and agent._sprite is not None:
                # Draw agent sprite (image), properly rotated
                rotated_sprite = agent._sprite if state['heading'] == (1, 0) else self.pygame.transform.rotate(agent._sprite, 180 if state['heading'][0] == -1 else state['heading'][1] * -90)
                self.screen.blit(rotated_sprite,
                    self.pygame.rect.Rect(agent_pos[0] - agent._sprite_size[0] / 2, agent_pos[1] - agent._sprite_size[1] / 2,
                        agent._sprite_size[0], agent._sprite_size[1]))
            else:
                # Draw simple agent (circle with a short line segment poking out to indicate heading)
                self.pygame.draw.circle(self.screen, agent_color, agent_pos, self.agent_circle_radius)
                self.pygame.draw.line(self.screen, agent_color, agent_pos, state['location'], self.road_width)
            if agent.get_next_waypoint() is not None:
                self.screen.blit(self.font.render(agent.get_next_waypoint(), True, agent_color, self.bg_color), (agent_pos[0] + 10, agent_pos[1] + 10))
            if state['destination'] is not None:
                self.pygame.draw.circle(self.screen, agent_color, (state['destination'][0] * self.env.block_size, state['destination'][1] * self.env.block_size), 6)
                self.pygame.draw.circle(self.screen, agent_color, (state['destination'][0] * self.env.block_size, state['destination'][1] * self.env.block_size), 15, 2)

        # * Overlays
        text_y = 10
        for text in self.env.status_text.split('\n'):
            self.screen.blit(self.font.render(text, True, self.colors['red'], self.bg_color), (100, text_y))
            text_y += 20

        # Flip buffers
        self.pygame.display.flip()

    def pause(self):
        abs_pause_time = time.time()
        pause_text = "[PAUSED] Press any key to continue..."
        self.screen.blit(self.font.render(pause_text, True, self.colors['cyan'], self.bg_color), (100, self.height - 40))
        self.pygame.display.flip()
        #print pause_text  # [debug]
        while self.paused:
            for event in self.pygame.event.get():
                if event.type == self.pygame.KEYDOWN:
                    self.paused = False
            self.pygame.time.wait(self.frame_delay)
        self.screen.blit(self.font.render(pause_text, True, self.bg_color, self.bg_color), (100, self.height - 40))
        self.start_time += (time.time() - abs_pause_time)
Beispiel #5
0
class Simulator(object):
    """Simulates agents in a dynamic smartcab environment.

    Uses PyGame to display GUI, if available.
    """

    colors = {
        'black': (0, 0, 0),
        'white': (255, 255, 255),
        'red': (255, 0, 0),
        'green': (0, 255, 0),
        'blue': (0, 0, 255),
        'cyan': (0, 200, 200),
        'magenta': (200, 0, 200),
        'yellow': (255, 255, 0),
        'orange': (255, 128, 0)
    }

    def __init__(self,
                 env,
                 size=None,
                 update_delay=1.0,
                 display=True,
                 live_plot=False):
        self.env = env
        self.size = size if size is not None else (
            (self.env.grid_size[0] + 1) * self.env.block_size,
            (self.env.grid_size[1] + 1) * self.env.block_size)
        self.width, self.height = self.size

        self.bg_color = self.colors['white']
        self.road_width = 5
        self.road_color = self.colors['black']

        self.quit = False
        self.start_time = None
        self.current_time = 0.0
        self.last_updated = 0.0
        self.update_delay = update_delay  # duration between each step (in secs)

        self.display = display
        if self.display:
            try:
                self.pygame = importlib.import_module('pygame')
                self.pygame.init()
                self.screen = self.pygame.display.set_mode(self.size)

                self.frame_delay = max(
                    1, int(self.update_delay *
                           1000))  # delay between GUI frames in ms (min: 1)
                self.agent_sprite_size = (32, 32)
                self.agent_circle_radius = 10  # radius of circle, when using simple representation
                for agent in self.env.agent_states:
                    agent._sprite = self.pygame.transform.smoothscale(
                        self.pygame.image.load(
                            os.path.join("../images",
                                         "car-{}.png".format(agent.color))),
                        self.agent_sprite_size)
                    agent._sprite_size = (agent._sprite.get_width(),
                                          agent._sprite.get_height())

                self.font = self.pygame.font.Font(None, 28)
                self.paused = False
            except ImportError as e:
                self.display = False
                print "Simulator.__init__(): Unable to import pygame; display disabled.\n{}: {}".format(
                    e.__class__.__name__, e)
            except Exception as e:
                self.display = False
                print "Simulator.__init__(): Error initializing GUI objects; display disabled.\n{}: {}".format(
                    e.__class__.__name__, e)

        # Setup metrics to report
        self.live_plot = live_plot
        self.rep = Reporter(metrics=[
            'net_reward', 'avg_net_reward', 'final_deadline', 'success'
        ],
                            live_plot=self.live_plot)
        self.avg_net_reward_window = 10

    def run(self, n_trials=1):
        self.quit = False
        self.rep.reset()
        for trial in xrange(n_trials):
            print "Simulator.run(): Trial {}".format(trial)  # [debug]
            self.env.reset()
            self.current_time = 0.0
            self.last_updated = 0.0
            self.start_time = time.time()
            while True:
                try:
                    # Update current time
                    self.current_time = time.time() - self.start_time
                    #print "Simulator.run(): current_time = {:.3f}".format(self.current_time)

                    # Handle GUI events
                    if self.display:
                        for event in self.pygame.event.get():
                            if event.type == self.pygame.QUIT:
                                self.quit = True
                            elif event.type == self.pygame.KEYDOWN:
                                if event.key == 27:  # Esc
                                    self.quit = True
                                elif event.unicode == u' ':
                                    self.paused = True

                        if self.paused:
                            self.pause()

                    # Update environment
                    if self.current_time - self.last_updated >= self.update_delay:
                        self.env.step()
                        # TODO: Log step data
                        self.last_updated = self.current_time

                    # Render GUI and sleep
                    if self.display:
                        self.render()
                        self.pygame.time.wait(self.frame_delay)
                except KeyboardInterrupt:
                    self.quit = True
                finally:
                    if self.quit or self.env.done:
                        break

            if self.quit:
                break

            # Collect/update metrics
            self.rep.collect('net_reward', trial,
                             self.env.trial_data['net_reward']
                             )  # total reward obtained in this trial
            self.rep.collect(
                'avg_net_reward', trial,
                np.mean(self.rep.metrics['net_reward'].ydata[
                    -self.avg_net_reward_window:]))  # rolling mean of reward
            self.rep.collect('final_deadline', trial,
                             self.env.trial_data['final_deadline']
                             )  # final deadline value (time remaining)
            self.rep.collect('success', trial, self.env.trial_data['success'])
            if self.live_plot:
                self.rep.refresh_plot(
                )  # autoscales axes, draws stuff and flushes events

        # Report final metrics
        if self.display:
            self.pygame.display.quit(
            )  # need to shutdown pygame before showing metrics plot
            # TODO: Figure out why having both game and plot displays makes things crash!

        if self.live_plot:
            self.rep.show_plot()  # holds till user closes plot window

    def render(self):
        # Clear screen
        self.screen.fill(self.bg_color)

        # Draw elements
        # * Static elements
        for road in self.env.roads:
            self.pygame.draw.line(self.screen, self.road_color,
                                  (road[0][0] * self.env.block_size,
                                   road[0][1] * self.env.block_size),
                                  (road[1][0] * self.env.block_size,
                                   road[1][1] * self.env.block_size),
                                  self.road_width)

        for intersection, traffic_light in self.env.intersections.iteritems():
            self.pygame.draw.circle(self.screen, self.road_color,
                                    (intersection[0] * self.env.block_size,
                                     intersection[1] * self.env.block_size),
                                    10)
            if traffic_light.state:  # North-South is open
                self.pygame.draw.line(
                    self.screen, self.colors['green'],
                    (intersection[0] * self.env.block_size,
                     intersection[1] * self.env.block_size - 15),
                    (intersection[0] * self.env.block_size,
                     intersection[1] * self.env.block_size + 15),
                    self.road_width)
            else:  # East-West is open
                self.pygame.draw.line(
                    self.screen, self.colors['green'],
                    (intersection[0] * self.env.block_size - 15,
                     intersection[1] * self.env.block_size),
                    (intersection[0] * self.env.block_size + 15,
                     intersection[1] * self.env.block_size), self.road_width)

        # * Dynamic elements
        for agent, state in self.env.agent_states.iteritems():
            # Compute precise agent location here (back from the intersection some)
            agent_offset = (2 * state['heading'][0] * self.agent_circle_radius,
                            2 * state['heading'][1] * self.agent_circle_radius)
            agent_pos = (state['location'][0] * self.env.block_size -
                         agent_offset[0],
                         state['location'][1] * self.env.block_size -
                         agent_offset[1])
            agent_color = self.colors[agent.color]
            if hasattr(agent, '_sprite') and agent._sprite is not None:
                # Draw agent sprite (image), properly rotated
                rotated_sprite = agent._sprite if state['heading'] == (
                    1, 0) else self.pygame.transform.rotate(
                        agent._sprite, 180 if state['heading'][0] == -1 else
                        state['heading'][1] * -90)
                self.screen.blit(
                    rotated_sprite,
                    self.pygame.rect.Rect(
                        agent_pos[0] - agent._sprite_size[0] / 2,
                        agent_pos[1] - agent._sprite_size[1] / 2,
                        agent._sprite_size[0], agent._sprite_size[1]))
            else:
                # Draw simple agent (circle with a short line segment poking out to indicate heading)
                self.pygame.draw.circle(self.screen, agent_color, agent_pos,
                                        self.agent_circle_radius)
                self.pygame.draw.line(self.screen, agent_color, agent_pos,
                                      state['location'], self.road_width)
            if agent.get_next_waypoint() is not None:
                self.screen.blit(
                    self.font.render(agent.get_next_waypoint(), True,
                                     agent_color, self.bg_color),
                    (agent_pos[0] + 10, agent_pos[1] + 10))
            if state['destination'] is not None:
                self.pygame.draw.circle(
                    self.screen, agent_color,
                    (state['destination'][0] * self.env.block_size,
                     state['destination'][1] * self.env.block_size), 6)
                self.pygame.draw.circle(
                    self.screen, agent_color,
                    (state['destination'][0] * self.env.block_size,
                     state['destination'][1] * self.env.block_size), 15, 2)

        # * Overlays
        text_y = 10
        for text in self.env.status_text.split('\n'):
            self.screen.blit(
                self.font.render(text, True, self.colors['red'],
                                 self.bg_color), (100, text_y))
            text_y += 20

        # Flip buffers
        self.pygame.display.flip()

    def pause(self):
        abs_pause_time = time.time()
        pause_text = "[PAUSED] Press any key to continue..."
        self.screen.blit(
            self.font.render(pause_text, True, self.colors['cyan'],
                             self.bg_color), (100, self.height - 40))
        self.pygame.display.flip()
        print pause_text  # [debug]
        while self.paused:
            for event in self.pygame.event.get():
                if event.type == self.pygame.KEYDOWN:
                    self.paused = False
            self.pygame.time.wait(self.frame_delay)
        self.screen.blit(
            self.font.render(pause_text, True, self.bg_color, self.bg_color),
            (100, self.height - 40))
        self.start_time += (time.time() - abs_pause_time)
Beispiel #6
0
def run():
    """Run the agent for a finite number of trials."""
    n_trials = 100

    op_alpha = 0.0  # optimal learning rate
    op_gamma = 0.0  # optimal discount factor
    op_eps = 0.0  # optimal exploration rate

    # Setup metrics to report per simulation
    param_metrics = Reporter(metrics=['alpha', 'gamma', 'epsilon', 'trips_perf', 'infractions_perf'], live_plot=True)

    # Set up environment and agent
    e = Environment(num_dummies=3)  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.001, display=False,
                    live_plot=True)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    idx = 0
    eps_range = [0] #np.arange(0,0.2,0.05)
    gamma_range = [0.1] #np.arange(0,0.6,0.1)
    alpha_range = [0.4] #np.arange(0.2,0.6,0.05)
    for epsilon in eps_range:
        for gamma in gamma_range:
            for alpha in alpha_range:
                print "Running simulation for: ", epsilon, gamma, alpha
                # Run n_sims simulations for given parameters and store results in lists trips_perf and infractions_perf
                n_sims = 1
                trips_perf = []
                infractions_perf = []

                for i in range(n_sims):
                    # Set agent parameters and reset experience
                    a.alpha = alpha
                    a.gama = gamma
                    a.eps = epsilon
                    a.experience = 1
                    a.infractions_rep.reset()

                    sim.run(n_trials=n_trials)  # run for a specified number of trials
                    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
                    # print sim.rep.summary()

                    for metric in sim.rep.summary():
                        if metric.name == 'success':
                            t_p = metric.sum() * 100 / n_trials
                            print "Name: {}, samples: {}, Performance: {}%".format(metric.name, len(metric), t_p)
                            trips_perf.append(t_p)

                    for metric in a.infractions_rep.summary():
                        if metric.name == 'invalid_moves':
                            i_p = abs(metric[metric == -1.0].sum()) * 100 / metric.size
                            print "Name: {}, samples: {}, Performance: {}%".format(metric.name, len(metric), i_p)
                            infractions_perf.append(i_p)

                # Collect metrics
                param_metrics.collect('alpha', idx, alpha)
                param_metrics.collect('gamma', idx, gamma)
                param_metrics.collect('epsilon', idx, epsilon)
                param_metrics.collect('trips_perf', idx, pd.Series(trips_perf).mean())
                param_metrics.collect('infractions_perf', idx, pd.Series(infractions_perf).mean())
                idx += 1

    # Show results
    results = pd.DataFrame(param_metrics.summary()).transpose()
    print results
    print 'Best configuration for trips performance:'
    print results.loc[results['trips_perf'].idxmax()]
    print 'Best configuration for traffic rules performance:'
    print results.loc[results['infractions_perf'].idxmin()]
    param_metrics.refresh_plot()
    param_metrics.show_plot()
Beispiel #7
0
class LearningAgent(Agent):
    """An agent that learns to drive in the smartcab world."""

    def __init__(self, env):
        super(LearningAgent, self).__init__(
            env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
        # TODO: Initialize any additional variables here
        self.actions = [None, 'forward', 'left', 'right']
        # Parameters in the Q - Learning algorithm
        self.alpha = 0.5  # learning rate (alpha) 0 < alpha < 1
        self.gamma = 0.8  # discount factor(gamma) 0 < gamma < 1
        self.eps = 1  # exploration rate(epsilon) 0 < eps < 1
        self.action_0 = None  # Action in t-1
        self.reward_0 = None  # Reward in t-1
        self.state_0 = None  # State in t-1
        self.Q = {}  # Empty Q table
        self.experience = 1 # smartcab "experience", increases with time

        # Setup infraction metrics to report
        self.live_plot = False
        self.infractions_rep = Reporter(metrics=['invalid_moves'], live_plot=self.live_plot)

    def reset(self, destination=None):
        self.planner.route_to(destination)
        # TODO: Prepare for a new trip; reset any variables here, if required
        self.action_0 = None
        self.reward_0 = None
        self.state_0 = None

    def update(self, t):

        # Alpha decay rate
        alpha_dr = self.experience
        # Eps decay rate
        eps_dr = math.log(self.experience + 2) # "+2" avoids div. by zero

        # Increase experience
        self.experience += 1

        # Gather inputs
        self.next_waypoint = self.planner.next_waypoint()  # from route planner, also displayed by simulator
        inputs = self.env.sense(self)
        deadline = self.env.get_deadline(self)

        # TODO: Update state (current state)
        self.state = (tuple(inputs.values()), self.next_waypoint)
        #print 'Current state:', self.state

        # Create zero-initialized state entry if current state doesn't exist in Q table
        if not self.Q.has_key(self.state):
            self.Q[self.state] = dict([(action, 0.0) for action in self.actions])
        #print 'Q table entry for current state:', self.Q[self.state]
        # print 'Q table:', self.Q

        # TODO: Select action according to your policy
        # Find actions with max q-values for current state and randomly pick one of it if more than one.
        best_actions = [act for act, q_value in self.Q[self.state].iteritems() if
                        q_value == max(self.Q[self.state].values())]

        if self.eps / eps_dr < random.random():
            action = best_actions[random.randint(0, len(best_actions) - 1)]  # Select action using Q-table
        else:
            action = self.actions[random.randint(0, len(self.actions) - 1)]  # Select action randomly
        #print 'Action to perform:', action, 'from best candidates:', best_actions

        # Execute action and get reward
        reward = self.env.act(self, action)

        self.infractions_rep.collect('invalid_moves', t, reward)
        if self.live_plot:
            self.infractions_rep.refresh_plot()  # autoscales axes, draws stuff and flushes events

        # TODO: Learn policy based on state, action, reward
        # Perform the calculation delayed by 1 step. Skip for first iteration (t=0).
        if not (None in [self.state_0, self.action_0, self.reward_0]):
            self.Q[self.state_0][self.action_0] = (1 - self.alpha / alpha_dr) * \
                self.Q[self.state_0][self.action_0] + self.alpha / alpha_dr * \
                (self.reward_0 + self.gamma * max(self.Q[self.state].iteritems(), key=op.itemgetter(1))[1])

        # Save current state, action and reward for next step
        self.state_0 = self.state
        self.action_0 = action
        self.reward_0 = reward