コード例 #1
0
    def test_each_boundary_within_default_env(self):
        """
        The agent follows a sequence of steps to check each boundary acts as expected (the current
        observation should be the same as the previous if you move into a boundary).
        The agent tries the top-left, top-right and bottom-right corners while avoiding the Terminal state.
        The step numbers where the agent ends up in the same state as previously
        are stored and then compared to the expected values and if exactly the same the test passes.
        """
        env = GridUniverseEnv()

        # self.action_descriptors = ['up', 'right', 'down', 'left']
        actions_to_take = [3, 0, 1, 1, 1, 1, 2, 2, 3, 2, 2, 3, 3, 3]
        boundary_test_step_numbers = [0, 1, 5, 10, 13]
        collected_boundary_step_numbers = []
        prev_observation = env.reset()
        for step_no, action in enumerate(actions_to_take):
            env.render()
            print('go ' + env.action_descriptors[action])
            observation, reward, done, info = env.step(action)

            if observation == prev_observation:
                collected_boundary_step_numbers.append(step_no)

            prev_observation = observation

        print('collected_boundary_steps:', collected_boundary_step_numbers)
        print('boundary_test_steps', boundary_test_step_numbers)
        boolean_elementwise_comparison = [a == b for a, b in zip(collected_boundary_step_numbers, boundary_test_step_numbers)]
        print(boolean_elementwise_comparison)
        print(all(boolean_elementwise_comparison))
        self.assertTrue(all(boolean_elementwise_comparison))
コード例 #2
0
def run_monte_carlo_evaluation():
    """
    Run Monte Carlo evaluation on random policy and then act greedily with respect to the value function
    after the evaluation is complete
    """

    print('\n' + '*' * 20 +
          'Starting Monte Carlo evaluation and greedy policy' + '*' * 20 +
          '\n')
    world_shape = (8, 8)
    # env = GridUniverseEnv(grid_shape=world_shape) # Default GridUniverse
    env = GridUniverseEnv(world_shape, random_maze=True)
    policy0 = np.ones([env.world.size, env.action_space.n
                       ]) / env.action_space.n

    print('Running an episode with a random agent (with initial policy)')
    st_history, rw_history, done = run_episode(policy0, env)

    print('Starting Monte-Carlo evaluation of random policy')
    value0 = monte_carlo_evaluation(policy0,
                                    env,
                                    every_visit=True,
                                    num_episodes=30)
    print(value0)

    # Create greedy policy from value function and run it on environment
    policy1 = utils.greedy_policy_from_value_function(policy0, env, value0)
    print(policy1)

    print('Policy: (up, right, down, left)\n',
          utils.get_policy_map(policy1, world_shape))
    np.set_printoptions(linewidth=75, precision=8)

    print('Starting greedy policy episode')
    curr_state = env.reset()
    env.render_policy_arrows(policy1)

    for t in range(500):
        env.render(mode='graphic')

        action = np.argmax(policy1[curr_state])
        print('go ' + env.action_descriptors[action])
        curr_state, reward, done, info = env.step(action)

        if done:
            print('Terminal state found in {} steps'.format(t + 1))
            env.render(mode='graphic')
            time.sleep(5)
            break
コード例 #3
0
def run_griduniverse_with_lava():
    """
    Run a random agent on an environment with lava
    """

    print('\n' + '*' * 20 + 'Starting to run random agent on default GridUniverse' + '*' * 20 + '\n')
    env = GridUniverseEnv(grid_shape=(10, 10), lava_states=[4, 14, 24, 34, 44, 54, 64, 74])
    for i_episode in range(5):
        observation = env.reset()
        for t in range(100):
            env.render(mode='graphic')  # set mode='graphic for pyglet render
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)

            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                print('Final states reward: ', reward)
                break
コード例 #4
0
def run_default_griduniverse():
    """
    Run a random agent on the default griduniverse.
    This piece of code shows the main interface to the environment. This runs in ascii format
    """

    print('\n' + '*' * 20 + 'Starting to run random agent on default GridUniverse' + '*' * 20 + '\n')
    env = GridUniverseEnv()
    for i_episode in range(1):
        observation = env.reset()
        for t in range(100):
            env.render()  # set mode='graphic for pyglet render
            action = env.action_space.sample()
            print('go ' + env.action_descriptors[action])
            observation, reward, done, info = env.step(action)

            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                break
コード例 #5
0
def run_griduniverse_from_text_file():
    """
    Run a random agent on an environment that was save via ascii text file.
    Check core/envs/maze_text_files for examples or the _create_custom_world_from_text() function within the environment
    """

    print('\n' + '*' * 20 + 'Creating a pre-made GridUniverse from text file and running random agent on it' + '*' * 20 + '\n')
    env = GridUniverseEnv(custom_world_fp='../core/envs/maze_text_files/test_env.txt')
    # env = GridUniverseEnv(custom_world_fp='../core/envs/maze_text_files/maze_21x21.txt')
    # env = GridUniverseEnv(custom_world_fp='../core/envs/maze_text_files/maze_101x101.txt')
    for i_episode in range(1):
        observation = env.reset()
        for t in range(1000):
            env.render(mode='graphic')
            action = env.action_space.sample()
            # print('go ' + env.action_descriptors[action])
            # time.sleep(0.1) # uncomment to watch slower
            observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                break
コード例 #6
0
def run_random_maze():
    """
    Run a random agent on a randomly generated maze. If random_maze parameter is set to True,
    a maze generation algorithm will place walls to form the maze in the requested shape.
    """

    print('\n' + '*' * 20 + 'Creating a random GridUniverse and running random agent on it' + '*' * 20 + '\n')
    env = GridUniverseEnv(grid_shape=(11, 11), random_maze=True)
    # env = GridUniverseEnv(grid_shape=(101, 101), random_maze=True)
    # env = GridUniverseEnv(grid_shape=(49, 51), random_maze=True)
    # env = GridUniverseEnv(grid_shape=(51, 49), random_maze=True)
    for i_episode in range(1):
        observation = env.reset()
        for t in range(1000):
            env.render(mode='graphic')
            env.step_num = t
            action = env.action_space.sample()
            # print('go ' + env.action_descriptors[action])
            observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                break
コード例 #7
0
from queue import Queue
# import queue

from core.envs.griduniverse_env import GridUniverseEnv

# todo: could rename file to path_finding.py

if __name__ == '__main__':
    print('\n' + '*' * 20 +
          'Creating a random GridUniverse and running random agent on it' +
          '*' * 20 + '\n')

    first_time = True
    for i in range(10):
        env = GridUniverseEnv(grid_shape=(15, 15), random_maze=True)
        curr_state = initial_state = env.reset()

        actions = range(4)

        all_valid_states = []

        visited = {}
        nodes_and_edges = {}

        # https: // en.wikipedia.org / wiki / A * _search_algorithm

        # def find_all_neighbouring_states(curr_state):
        #     nodes_and_edges[curr_state] = []
        #     for action in actions:
        #         # next_states = []
        #         next_state, reward, done = env.look_step_ahead(curr_state, action)
コード例 #8
0
def run_policy_and_value_iteration():
    """
    Majority of code is within utils.py and dynamic_programming.py for this function
    This function does 4 things:

    1. Evaluate the value function of a random policy a number of times
    2. Create a greedy policy created from from this value function
    3. Run Policy Iteration
    4. Run Value Iteration
    5. Run agent on environment on policy found from Value Iteration
    """
    print('\n' + '*' * 20 + 'Starting value and policy iteration' + '*' * 20 +
          '\n')

    # 1. Evaluate the value function of a random policy a number of times
    world_shape = (4, 4)
    # env = GridUniverseEnv(grid_shape=world_shape, goal_states=[3, 12]) # Sutton and Barlo/David Silver example
    # specific case with lava and path true it
    # env = GridUniverseEnv(grid_shape=world_shape, lava_states=[i for i in range(15) if i not in [0, 4, 8, 12, 13, 14, 15]])
    world_shape = (11, 11)
    env = GridUniverseEnv(grid_shape=world_shape, random_maze=True)
    policy0 = np.ones([env.world.size,
                       len(env.action_state_to_next_state)]) / len(
                           env.action_state_to_next_state)
    v0 = np.zeros(env.world.size)
    val_fun = v0
    for k in range(500):
        val_fun = utils.single_step_policy_evaluation(policy0,
                                                      env,
                                                      value_function=val_fun)
    print(utils.reshape_as_griduniverse(val_fun, world_shape))

    # 2. Create a greedy policy created from from this value function
    policy1 = utils.greedy_policy_from_value_function(policy0, env, val_fun)
    policy_map1 = utils.get_policy_map(policy1, world_shape)
    print('Policy: (0=up, 1=right, 2=down, 3=left)\n', policy_map1)
    np.set_printoptions(linewidth=75 * 2, precision=4)
    print('Policy: (up, right, down, left)\n',
          utils.get_policy_map(policy1, world_shape))
    np.set_printoptions(linewidth=75, precision=8)

    # 3. Run Policy Iteration
    print('Policy iteration:')
    policy0 = np.ones([env.world.size,
                       len(env.action_state_to_next_state)]) / len(
                           env.action_state_to_next_state)
    optimal_value, optimal_policy = dp.policy_iteration(policy0,
                                                        env,
                                                        v0,
                                                        threshold=0.001,
                                                        max_steps=1000)
    print('Value:\n', utils.reshape_as_griduniverse(optimal_value,
                                                    world_shape))
    print('Policy: (0=up, 1=right, 2=down, 3=left)\n',
          utils.get_policy_map(optimal_policy, world_shape))
    np.set_printoptions(linewidth=75 * 2, precision=4)
    print('Policy: (up, right, down, left)\n',
          utils.get_policy_map(optimal_policy, world_shape))
    np.set_printoptions(linewidth=75, precision=8)

    # 4. Run Value Iteration
    print('Value iteration:')
    policy0 = np.ones([env.world.size,
                       len(env.action_state_to_next_state)]) / len(
                           env.action_state_to_next_state)
    optimal_value, optimal_policy = dp.value_iteration(policy0,
                                                       env,
                                                       v0,
                                                       threshold=0.001,
                                                       max_steps=100)
    print('Value:\n', utils.reshape_as_griduniverse(optimal_value,
                                                    world_shape))
    print('Policy: (0=up, 1=right, 2=down, 3=left)\n',
          utils.get_policy_map(optimal_policy, world_shape))
    np.set_printoptions(linewidth=75 * 2, precision=4)
    print('Policy: (up, right, down, left)\n',
          utils.get_policy_map(optimal_policy, world_shape))
    np.set_printoptions(linewidth=75, precision=8)

    # 5. Run agent on environment on policy found from Value Iteration
    print('Starting to run agent on environment with optimal policy')
    curr_state = env.reset()
    env.render_policy_arrows(optimal_policy)

    # Dynamic programming doesn't necessarily have the concept of an agent.
    # But you can create an agent to run on the environment using the found policy
    for t in range(100):
        env.render(mode='graphic')

        action = np.argmax(optimal_policy[curr_state])
        print('go ' + env.action_descriptors[action])
        curr_state, reward, done, info = env.step(action)

        if done:
            print('Terminal state reached in {} steps'.format(t + 1))
            env.render(
                mode='graphic')  # must render here to see agent in final state
            time.sleep(6)
            env.render(close=True)
            break