예제 #1
0
def run():
    mdp = mdps.MazeMDP(room_size=5, num_rooms=5)
    print 'online RL algorithm: '
    total_rewards, V = simulate_online_RL_algorithm(mdp)
    mdp.print_v(V)
    learning_utils.plot_rewards(total_rewards)
    print 'DP algorithm: '
    simulate_MDP_algorithm(mdp)
예제 #2
0
def run():
    mdp = mdps.MazeMDP(room_size=5, num_rooms=5)
    print 'online RL algorithm: '
    total_rewards, V = simulate_online_RL_algorithm(mdp)
    mdp.print_v(V)
    learning_utils.plot_rewards(total_rewards)
    print 'DP algorithm: '
    simulate_MDP_algorithm(mdp)
예제 #3
0
def run_keras_nnet():
    mdp = mdps.MazeMDP(room_size=5, num_rooms=2)
    num_episodes = 200
    total_rewards, total_steps, trajectory, V = simulate_keras_online_RL_algorithm(mdp=mdp, num_episodes=num_episodes, max_iterations=100)
    print 'average_reward: {}'.format(np.mean(total_rewards[num_episodes / 2:]))
    print 'average_steps: {}'.format(np.mean(total_steps[num_episodes / 2]))
    learning_utils.plot_rewards(total_rewards)
    learning_utils.plot_rewards(total_steps)
    print trajectory
    mdp.print_trajectory(trajectory)
    print V
    mdp.print_v(V)
예제 #4
0
def run_keras_nnet():
    mdp = mdps.MazeMDP(room_size=5, num_rooms=2)
    num_episodes = 200
    total_rewards, total_steps, trajectory, V = simulate_keras_online_RL_algorithm(
        mdp=mdp, num_episodes=num_episodes, max_iterations=100)
    print 'average_reward: {}'.format(np.mean(total_rewards[num_episodes /
                                                            2:]))
    print 'average_steps: {}'.format(np.mean(total_steps[num_episodes / 2]))
    learning_utils.plot_rewards(total_rewards)
    learning_utils.plot_rewards(total_steps)
    print trajectory
    mdp.print_trajectory(trajectory)
    print V
    mdp.print_v(V)
예제 #5
0
def run_nnet():
    mdp = mdps.MazeMDP(room_size=5, num_rooms=2)
    total_rewards, total_losses = simulate_symbolic_online_RL_algorithm(mdp=mdp, num_episodes=700, max_iterations=100)
    learning_utils.plot_rewards(total_rewards)
    learning_utils.plot_rewards(total_losses)
예제 #6
0
def run_nnet():
    mdp = mdps.MazeMDP(room_size=5, num_rooms=2)
    total_rewards, total_losses = simulate_symbolic_online_RL_algorithm(
        mdp=mdp, num_episodes=700, max_iterations=100)
    learning_utils.plot_rewards(total_rewards)
    learning_utils.plot_rewards(total_losses)