Example #1
0
    def __init__(self):             
        coins = generate_coins(testing_map.data)
        self.hero_position = generate_hero(testing_map.data)
        episodeSnapshot = EpisodeSnapshot('static/map/testing.json', coins, self.hero_position)

        self.game = Game(episodeSnapshot, True)
        self.env = Maze(episode_threshold=None)
Example #2
0
class Tester():
    def __init__(self):             
        coins = generate_coins(testing_map.data)
        self.hero_position = generate_hero(testing_map.data)
        episodeSnapshot = EpisodeSnapshot('static/map/testing.json', coins, self.hero_position)

        self.game = Game(episodeSnapshot, True)
        self.env = Maze(episode_threshold=None)

    def on_coin_grabbed(self, maze_position):
        """Works as a callback when a coin is grabbed. A new one is generated based on this event.

        Args:
            maze_position (tuple): a position of the currently collected coin
        """
        coin = generate_coins(testing_map.data, count=1, grabbed_coin_position=maze_position)[0]

        self.game.append_coin(coin)
        self.env.update_reward_matrix()

    def test(self):
        """Testing process of the agent differs a bit from the training one. There isn't a explicit way of how to end the episode unless the agent steps out of the road, so coins are generated automatically in an infinite loop.
        """
        agent.load_pretrained_model()

        obs = self.env.reset(testing_map.data, self.hero_position, self.on_coin_grabbed)
        done = False
        actions = []
        reward_sum = 0

        visualization_done = False

        while not visualization_done:
            if not done:
                action = agent.choose_action(obs)
                next_obs, reward, done = self.env.step(action)
                reward_sum += reward

                obs = next_obs
                actions.append(action)

            if actions:
                visualization_done, _ = self.game.play(Move(actions.pop(0)))
            else:
                visualization_done, _ = self.game.play()

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    visualization_done = True
                    done = True

        self.game.gameOver(reward_sum)
Example #3
0
def play(map_index):
    map_npy = 'mappe_test/map_' + map_index + '.npy'
    plt.grid(True)
    maze = np.load(map_npy)
    exit_cell = (30, 5)  #(37_27) 80_1 (30,5) 80_2 ....
    model_name = 'NN double augm prior 8 rays +  delta location ' + map_index
    while True:

        plt.imshow(maze, cmap="binary")
        plt.plot(exit_cell[0], exit_cell[1], "gs",
                 markersize=5)  # exit is a big green square
        plt.title(map_npy)
        plt.show()
        start_cell = tuple(int(x) for x in input('start cell: ').split(
        ))  #(20,28) (20,25) (14,5) (22,21) 80_1// (38,16) 80_2
        game = Maze(maze,
                    start_cell=start_cell,
                    exit_cell=exit_cell,
                    close_reward=-0.5)
        model = QReplayDoubleAugmPrior8(game, name=model_name, load=True)
        status, trajectory, time_elapsed = game.play(model,
                                                     start_cell=start_cell)
        game.render("moves")
        game.play(model, start_cell=start_cell)
        print('*******************************************')
        print('status = {}'.format(status))
        print('trajectory = {}'.format(trajectory))
        print('time elapsed = {} seconds'.format(time_elapsed))
        repeat = input('Type True to repeat: ')
        if repeat != "True":
            break
Example #4
0
def trial(robot: Agent) -> List[int]:
    maze = Maze()
    move_history = []
    for i in range(5000):
        if i % 1000 == 0:
            print(i)
        while not maze.is_complete():
            state, _ = maze.get_state_and_reward()
            action = robot.choose_action(state, maze.allowed_states[state])
            maze.update_maze(action)
            state, reward = maze.get_state_and_reward()
            robot.update_state_history(state, reward)
            if maze.steps > 1000:
                maze.robot_position = State(5, 5)
        robot.learn()
        move_history.append(maze.steps)
        maze.reset()
    return move_history
Example #5
0
def run_simulation(alpha, watch=False):
    maze = Maze(__ACTION_SPACE)
    robot = (
        Agent(
            __ACTION_SPACE,
            maze.allowed_states,
            alpha=alpha,
            epsilon=__EPSILON
        )
    )
    step_totals = []
    if watch:
        print("Starting simulation...")
        time.sleep(0.1)
    else:
        print("Beginning simulation with:")
        print(f"Robot <alpha: {robot.alpha}, epsilon: {robot.epsilon}>")
        print("Maze:")
        maze.print_maze()
        print("Starting episodes...")
    for i in range(__NUM_EPISODES):
        if i % 1000 == 0 and i > 0:
            print(f"{i} episodes completed...")
        while not maze.is_game_over():
            run_episode(maze, robot)
            if watch:
                print(f"Robot <alpha: {robot.alpha}, epsilon: {robot.epsilon}>")
                print(f"Episode {i}")
                maze.print_maze()
                print(f"Number of steps: {maze.num_steps}")
                time.sleep(0.020)
                os.system("clear")
        robot.learn()
        step_totals.append(maze.num_steps)
        # reset maze for next episode
        maze = Maze(__ACTION_SPACE)

    print("Simulation complete.")
    print("----------")
    return step_totals
Example #6
0
def train_interactive():
    regenerate = True
    maze_matrix = None

    while regenerate:
        maze_matrix = generate_prims_maze_matrix()
        environment_maze = Maze(maze_matrix)
        show(environment_maze)
        regenerate = input("Should regenerate?") == "y"
    if input("Should Train?") == "y":
        model = build_model(maze_matrix.size)
        weights_file = input("File to load from (h5)?")
        if weights_file:
            print("loading weights from file: %s" % (weights_file, ))
            model.load_weights(weights_file)
        train(model,
              environment_maze,
              epochs=1000,
              max_memory=8 * maze_matrix.size,
              data_size=32)
        save_model(model, input("Output filename?"))
Example #7
0
def train_recurring():
    name = "super_model"
    file_name = "super_model.h5"
    first = True
    index = 0
    path = "TestTwo/"
    name = "TT"
    while True:
        sample_maze = generate_prims_maze_matrix(11, 11)
        environment_maze = Maze(sample_maze)
        show(environment_maze, file_name="%s%s%d" % (path, name, index))
        model = build_model(sample_maze.size)
        if not first:
            model.load_weights(file_name)
            first = False
        train(model,
              environment_maze,
              epochs=1000,
              max_memory=8 * sample_maze.size,
              data_size=32)
        save_model(model, name)
        index += 1
Example #8
0
def test_sarsa_lambda_with_epsilon_greedy():
    env = Maze()
    agent = SarsaLambdaAgentWithEpsilonGreedy(act_n=4)
    rs = sarsa_demo(env, agent, 2000)
    plt.plot(range(2000), rs), plt.grid(), plt.show()
Example #9
0
def test_sarsa_lambda_with_ucb1():
    env = Maze()
    agent = SarsaLambdaAgentWithUCB1(act_n=4)
    rs = sarsa_demo(env, agent, 2000)
    plt.plot(range(2000), rs), plt.grid(), plt.show()
Example #10
0
def test_sarsa():
    env = Maze()
    agent = SarsaAgent(act_n=4)
    rs = sarsa_demo(env, agent, 2000)
    plt.plot(range(2000), rs), plt.grid(), plt.show()
Example #11
0
    navigation_map = ""
    line = ""
    for column in range(len(maze.content)):
        for row in range(len(maze.content[column])):
            if maze.content[row, column] == wall:
                line += ICONS["wall"] + " "
            else:
                # Determine the direction of the arrow
                state = (row, column)
                valid_next_states = maze.get_valid_next_states(state)
                best_next_state = get_best_next_state(Q, state,
                                                      valid_next_states)
                direction = get_direction(state, best_next_state)
                line += ICONS[direction] + " "

        navigation_map += line + "\n"
        line = ""
    return navigation_map


if __name__ == "__main__":
    input_size = int(input("Size? "))
    size = input_size if input_size else 11

    maze = Maze(generate_prims_maze_matrix(size, size))
    print(maze)
    model = build_model(maze.width, maze.height)
    train_for_random_start(model, maze)
    # test_all(model, maze)
    print(get_navigation_map(model, maze))
Example #12
0
        ['%', '%', '%', '%', '%', '%', '%', '%', '%', '%'],
        ['%', '%', '%', '%', '%', '%', '%', '%', '%', '%']
    ])
)

HIDDEN_SIZE = 256
BATCH_SIZE = 20
GAMMA = 0.98
PERCENTILE = 25
SEED = 1234
LEARNING_RATE = 0.003
EPISODES_THRESHOLD = 200
DESIRED_REWARD = 3.75
MAP_ITERATIONS = 256

env = Maze()
layer_sizes = [env.observations_count, HIDDEN_SIZE, env.actions_count]
agent = Agent(layer_sizes, SEED, LEARNING_RATE)

Episode = namedtuple('Episode', field_names = ['reward','steps'])
EpisodeStep = namedtuple('EpisodeStep',field_names = ['observation','action'])

def iterate_batches(maze, hero_position, epsilon):
    """The main purpose of the method is to iterate through individual episodes during the training process. Those are then returned in batch. 

    Args:
        maze (matrix): NumPy matrix representing map (environment)
        hero_position (tuple): current hero's position on the map
        epsilon (float): a scalar used for epsilon-greedy

    Yields:
Example #13
0
def test_monte_carlo():
    env = Maze()
    agent = MonteCarloAgent(act_n=4)
    rs = monte_carlo_demo(env, agent, 2000)
    plt.plot(range(2000), rs), plt.grid(), plt.show()
Example #14
0
import numpy as np
from environment import Maze
from agent import Agent
import matplotlib.pyplot as plt

if __name__ == '__main__':
    maze = Maze()
    robot = Agent(maze, alpha=0.1, randomFactor=0.25)
    moveHistory = []
    for i in range(5000):
        if i % 1000 == 0:
            print(i)
        while not maze.isGameOver():
            state, _ = maze.getStateAndReward()
            action = robot.chooseAction(state, maze.allowedStates[state])
            maze.updateMaze(action)
            state, reward = maze.getStateAndReward()
            robot.updateStateHistory(state, reward)
            if maze.steps > 1000:
                maze.robotPosition = (5, 5)
        robot.learn()
        moveHistory.append(maze.steps)
        maze = Maze()

    maze = Maze()
    robot = Agent(maze, alpha=0.99, randomFactor=0.25)
    moveHistory2 = []
    for i in range(5000):
        if i % 1000 == 0:
            print(i)
        while not maze.isGameOver():
Example #15
0
            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()

    #    RL = DeepQNetwork(env.n_actions, env.n_features,
    #                      learning_rate=0.01,
    #                      reward_decay=0.9,
    #                      e_greedy=0.9,
    #                      replace_target_iter=200,
    #                      memory_size=2000
    #                      )

    # param tuning by hand, best version for now
    RL = DeepQNetwork(env.n_actions,
                      env.n_features,
                      learning_rate=0.005,
                      reward_decay=0.8,
                      e_greedy=0.8,
Example #16
0
patient_number = '0866'
map_number = str(88)
map_index = str(1)
map_name = 'mappe_test/map_'+patient_number+'_'+map_number+'_'+map_index
maze = np.load(map_name+'.npy')
plotmap(map_name+'.npy')
load = False




if __name__ == '__main__':
        
    start_cell = tuple(int(x) for x in input('start cell: ').split())
    exit_cell = tuple(int(x) for x in input('exit cell: ').split())
    game = Maze(maze, start_cell=start_cell, exit_cell=exit_cell, close_reward = -0.5)

    if 0:  # train using a neural network with experience replay (also saves the resulting model)
        model = QReplayDoubleAugmPrior4(game, name = "NN double augm prior 4 rays")
        h, w, _, _ = model.train(discount=0.80, exploration_rate=0.60, episodes=700, max_memory=maze.size * 4)

    if 1:  # train using a neural network with experience replay (also saves the resulting model)
        model_name = "NN double augm prior 8 rays +  delta location "+ patient_number +'_'+map_number+'_'+map_index
        model = QReplayDoubleAugmPrior8(game, name = model_name, load = False)  
        h, w, _, _ = model.train(discount=0.80, exploration_rate=0.60, episodes=1600, max_memory=maze.size * 4) 

    if 0:  # train using a neural network with experience replay (also saves the resulting model)
        model = QReplayDoubleAugmPrior3(game, name = "NN double augm prior 3 rays +  delta location")  
        h, w, _, _ = model.train(discount=0.80, exploration_rate=0.60, episodes=300, max_memory=maze.size * 4)

    if 0:  # train using a neural network with experience replay (also saves the resulting model)
                movements.append(n_movements)
                print('n_movements: {0}'.format(n_movements))
                break
    print('Game is finished!')
    print(agent.q_table)
    plot_rewards_movements()
    return


def plot_rewards_movements():
    plt.figure()
    plt.plot(list(range(EPISODE_COUNT)), movements)
    plt.xlabel('Episode')
    plt.ylabel('#Movements')
    plt.show()
    return


if __name__ == '__main__':
    environment = Maze(action_space=ACTION_SPACE,
                       pit_reward=PIT_REWARD,
                       destination_reward=DESTINATION_REWARD,
                       wall_reward=WALL_REWARD)
    agent = Agent(n_states=environment.width * environment.height,
                  action_space=ACTION_SPACE,
                  alpha=ALPHA,
                  gamma=GAMMA,
                  epsilon=EPSILON)
    environment.window.after(10, run_experiment)
    environment.window.mainloop()
import matplotlib.pyplot as plt

from environment import Maze
from models import *

logging.basicConfig(level=logging.INFO,
                    format="%(levelname)s: %(asctime)s: %(message)s",
                    datefmt="%H:%M:%S")

maze = np.array([[0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 1, 0, 1, 0, 0],
                 [0, 0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0, 0],
                 [1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 1],
                 [0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0,
                                            0]])  # 0 = free, 1 = occupied

game = Maze(maze)

if 0:  # only show the maze
    game.display = True
    game.reset()

if 0:  # play using random model
    model = RandomModel(game)
    model.train()

if 0:  # train using tabular Q-learning
    model = QTableModel(game)
    h, _, _ = model.train(discount=0.90,
                          exploration_rate=0.10,
                          learning_rate=0.10,
                          episodes=10000)
def update():
    for episode in range(100):
        # initial observation
        observation = env.reset()

        while True:
            env.render()
            # RL choose action based on observation
            action = RL.choose_action(str(observation))

            # RL take action and get next observation and reward
            observation_, reward, done = env.step(action)

            # RL learn from this transition
            RL.learn(str(observation), action, reward, str(observation_))

            # swap observation
            observation = observation_
            if done:
                break

    # end of navigation
    print('Reinforcement Learning done successful')
    env.destroy()

if __name__ == "__main__":
    env = Maze()
    RL = QLearningTable(actions=list(range(env.n_actions)))

    env.after(100, update)
    env.mainloop()
Example #20
0
def test_single(model, maze_width=11):
    maze_matrix = generate_prims_maze_matrix(maze_width)
    maze = Maze(maze_matrix)
    initial_cell = random.choice(maze.free_cells)
    maze.reset(initial_cell)
    return play_game(model, maze, initial_cell), maze
Example #21
0
import numpy as np
from environment import Maze
from agent import Agent
from constants import maze_configuration

if __name__ == '__main__':
    maze = Maze(maze_configuration)
    robot = Agent(maze.allowed_states, alpha=0.1, exploration_factor=0.25)
    move_history = []
    robot.printRewardMap()

    for episode in range(5000):
        if episode % 1000 == 0:
            print(episode)
            robot.printRewardMap()

        while not maze.isGameOver():
            state, _ = maze.getStateAndReward()
            action = robot.chooseAction(state, maze.allowed_states[state])

            maze.updateMaze(action)

            state, reward = maze.getStateAndReward()
            robot.updateStateHistory(state, reward)
            if maze.steps > 1000:
                maze.robot_position = (5, 5)

        robot.learn()
        move_history.append(maze.steps)
        maze = Maze(maze_configuration)
Example #22
0
def test_q_learn():
    env = Maze()
    agent = QLearnAgent(act_n=4)
    rs = q_learn_demo(env, agent, 2000)
    plt.plot(range(2000), rs), plt.grid(), plt.show()
Example #23
0
import matplotlib.pyplot as plt

from environment import Maze
from models import *

logging.basicConfig(level=logging.INFO,
                    format="%(levelname)s: %(asctime)s: %(message)s",
                    datefmt="%H:%M:%S")

maze = np.array([[0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 1, 0, 1, 0, 0],
                 [0, 0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0, 0],
                 [1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 1],
                 [0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0,
                                            0]])  # 0 = free, 1 = occupied

game = Maze(maze)

if 0:  # only show the maze
    game.render("moves")
    game.reset()

if 0:  # play using random model
    model = RandomModel(game)
    model.train()

if 0:  # train using tabular Q-learning
    model = QTableModel(game, name="QTableModel")
    h, w, _, _ = model.train(discount=0.90,
                             exploration_rate=0.10,
                             learning_rate=0.10,
                             episodes=200)
Example #24
0
        while not maze.is_complete():
            state, _ = maze.get_state_and_reward()
            action = robot.choose_action(state, maze.allowed_states[state])
            maze.update_maze(action)
            state, reward = maze.get_state_and_reward()
            robot.update_state_history(state, reward)
            if maze.steps > 1000:
                maze.robot_position = State(5, 5)
        robot.learn()
        move_history.append(maze.steps)
        maze.reset()
    return move_history


if __name__ == '__main__':
    allowed_states = Maze().allowed_states

    robot_1 = Agent(allowed_states, alpha=0.1, random_factor=0.25)
    move_history_1 = trial(robot_1)

    robot_2 = Agent(allowed_states, alpha=0.99, random_factor=0.25)
    move_history_2 = trial(robot_2)

    plt.subplot(211)
    plt.semilogy(move_history_1, 'b-')
    plt.xlabel('episode')
    plt.ylabel('steps to solution')
    plt.legend(['alpha=0.1'])
    plt.subplot(212)
    plt.semilogy(move_history_2, 'r-')
    plt.xlabel('episode')