Ejemplo n.º 1
0
    def run(self):
        """Runs the instance."""
        clock = Clock()

        while self._is_active:
            # redraw screen
            self._redraw_screen()

            # check for events
            for event in get_event():
                # clicking quit button of window kills the game
                if event.type == QUIT:
                    quit_pygame()

                # clicking key board button to move paddle
                if event.type == KEYDOWN:
                    # button is key W
                    if event.key == K_w:
                        self._paddle1.velocity = -MAX_VELOCITY
                    # button is key S
                    elif event.key == K_s:
                        self._paddle1.velocity = MAX_VELOCITY
                    # button is up arrow key
                    if event.key == K_UP:
                        self._paddle2.velocity = -MAX_VELOCITY
                    # button is down arrow key
                    elif event.key == K_DOWN:
                        self._paddle2.velocity = MAX_VELOCITY
                if event.type == KEYUP:
                    # button is key W or key S
                    if event.key == K_w or event.key == K_s:
                        self._paddle1.velocity = 0
                    # button is up arrow key or down arrow key
                    if event.key == K_UP or event.key == K_DOWN:
                        self._paddle2.velocity = 0

            # update coordinates of paddles and ball
            self._move_paddles_and_ball()

            # handle collisions of paddles with walls and of ball with
            # walls and paddles
            self._handle_wall_collision()
            self._handle_paddles_ball_collision()

            # set count of updates
            clock.tick(FRAMES_PER_SECOND)

        self._draw_game_over_screen()

        while pygame_is_active():
            # check for events
            for event in get_event():
                # clicking quit button of window kills the game
                if event.type == QUIT:
                    quit_pygame()
Ejemplo n.º 2
0
    def run(self):
        """Runs the instance."""
        clock = Clock()

        while self._n_lives > 0:
            # redraw screen
            self._redraw_screen()

            # check for events
            for event in get_event():
                # clicking quit button of window kills the game
                if event.type == QUIT:
                    quit_pygame()

                # clicking key board button to move paddle
                if event.type == KEYDOWN:
                    # button is left arrow key
                    if event.key == K_LEFT:
                        self._paddle.velocity = -MAX_VELOCITY
                    # button is right arrow key
                    elif event.key == K_RIGHT:
                        self._paddle.velocity = MAX_VELOCITY

            # update coordinates of paddle and ball
            self._move_paddle_and_ball()

            # handle collisions of paddle with walls and of ball with
            # walls and paddle
            self._handle_wall_collision()
            self._handle_paddle_ball_collision()

            # set count of updates
            clock.tick(FRAMES_PER_SECOND)

        self._draw_game_over_screen()

        while pygame_is_active():
            # check for events
            for event in get_event():
                # clicking quit button of window kills the game
                if event.type == QUIT:
                    quit_pygame()
Ejemplo n.º 3
0
 def close_game(self):
     quit_pygame()
Ejemplo n.º 4
0
def simulate(maze: gym.envs,
             n_episodes,
             winning_streak=100,
             learning_rate=0.01,
             epsilon=0.3,
             decay=1.0,
             policy="EG",
             starting_value=0.0,
             discount_factor=0.99,
             display=False):
    """
    :param maze: gym.env
    :param n_episodes: Total # of episodes to attempts, before giving up (must be considerably larger than winning_streak)
    :param winning_streak: <int> # of successes in a row, after which the optimal policy is assumed to have been learnt
    :param learning_rate: <float>
    :param epsilon: Parameter for exploration
    :param decay: Decay rate (for exploration and learning) when policy is eps. decay
    :param policy: "EG", "ED" or "UCB" for epsilon-greedy, decaying epsilon-greedy and UCB policies
    :param starting_value: initialization value for Q(s,a)
    :param discount_factor: Discount for rewards
    :param display: Whether to display the PyGame console
    :return: The episode at which winning_streak was achieved
    """
    maze_size = tuple((maze.observation_space.high +
                       np.ones(maze.observation_space.shape)).astype(int))
    maze_boundary = list(
        zip(maze.observation_space.low, maze.observation_space.high))

    # If winning_streak is not achieved within this, then terminate
    max_steps = np.prod(maze_size, dtype=int) * 100

    # If maze is solved with more steps than this, then FAIL
    max_steps_for_success = np.prod(maze_size, dtype=int)

    q_table = np.ones(maze_size +
                      (maze.action_space.n, ), dtype=float) * starting_value

    # For UCB method, we need to store upper confidence bounds
    if policy == "UCB":
        ucb_table = np.ones(maze_size +
                            (maze.action_space.n, ), dtype=float) * epsilon

    success_streak = 0  # Number of times we solved the maze (in a row)
    fail_streak = 0  # Number of times learning failed (in a row)

    # Simulation results
    losses = []
    returns = []
    winning_episode = 0

    if display:
        maze.render()

    for episode in range(n_episodes):

        # Reset the environment
        new_state = maze.reset()

        # the initial state
        state = bound_state(new_state, maze_boundary)
        total_reward = 0

        for t in range(max_steps):

            # Select an action
            # Using EPSILON GREEDY
            if policy == "EG":
                action = select_action_eps_greedy(
                    action_space=maze.action_space,
                    q_values=q_table[state],
                    epsilon=epsilon)

            # Using DECAYING-EPSILON GREEDY
            elif policy == "ED":
                action = select_action_eps_greedy(
                    action_space=maze.action_space,
                    q_values=q_table[state],
                    epsilon=epsilon)
                epsilon = np.max([epsilon * decay, 0.001])
                learning_rate = np.max([learning_rate * decay, 0.001])

            # Using UPPER CONFIDENCE BOUNDS
            elif policy == "UCB":
                action = select_action_ucb(q_values=q_table[state],
                                           ucbs=ucb_table[state])
                ucb_table[state][action] *= decay

            # Execute the action
            new_state, reward, solved, _ = maze.step(action)

            # Observe the reward
            new_state = bound_state(new_state, maze_boundary)
            total_reward += reward

            # Update Q(s,a)
            q_max = np.amax(q_table[new_state])
            loss = reward + discount_factor * q_max - q_table[state +
                                                              (action, )]
            q_table[state + (action, )] += learning_rate * loss

            # For next iteration
            state = new_state
            losses.append(loss)

            # Render PyGame frame
            if display:
                maze.render()

            # Update # of fails in a row
            if t == max_steps - 1:
                fail_streak += 1

            if solved:
                fail_streak = 0
                returns.append(total_reward)

                # Update # of successes in a row
                if t <= max_steps_for_success:
                    success_streak += 1
                else:
                    success_streak = 0
                break

        # Conditions for Win / Loss
        # If <losing_streak> # of failures were achieved in a row
        if fail_streak > losing_streak:
            # print(f"Failed {losing_streak} times in a row...")
            break

        # If <winning_streak> # of successes were achieved in a row
        if success_streak > winning_streak:
            winning_episode = episode
            break

    try:
        return {
            "winning_episode":
            winning_episode,
            "losses":
            losses,
            "avg_losses": [
                np.average(losses[i * 10:(i + 1) * 10])
                for i in range(int(len(losses) / 10))
            ],
        }

    finally:
        quit_pygame()
Ejemplo n.º 5
0
 def _handle(self, evt, *args, **kwargs):
     if self._on_receipt != None:
         self._on_receipt(evt, *args, **kwargs)
     quit_pygame()