def run(): mdp = mdps.MazeMDP(room_size=5, num_rooms=5) print 'online RL algorithm: ' total_rewards, V = simulate_online_RL_algorithm(mdp) mdp.print_v(V) learning_utils.plot_rewards(total_rewards) print 'DP algorithm: ' simulate_MDP_algorithm(mdp)
def run_keras_nnet(): mdp = mdps.MazeMDP(room_size=5, num_rooms=2) num_episodes = 200 total_rewards, total_steps, trajectory, V = simulate_keras_online_RL_algorithm(mdp=mdp, num_episodes=num_episodes, max_iterations=100) print 'average_reward: {}'.format(np.mean(total_rewards[num_episodes / 2:])) print 'average_steps: {}'.format(np.mean(total_steps[num_episodes / 2])) learning_utils.plot_rewards(total_rewards) learning_utils.plot_rewards(total_steps) print trajectory mdp.print_trajectory(trajectory) print V mdp.print_v(V)
def run_keras_nnet(): mdp = mdps.MazeMDP(room_size=5, num_rooms=2) num_episodes = 200 total_rewards, total_steps, trajectory, V = simulate_keras_online_RL_algorithm( mdp=mdp, num_episodes=num_episodes, max_iterations=100) print 'average_reward: {}'.format(np.mean(total_rewards[num_episodes / 2:])) print 'average_steps: {}'.format(np.mean(total_steps[num_episodes / 2])) learning_utils.plot_rewards(total_rewards) learning_utils.plot_rewards(total_steps) print trajectory mdp.print_trajectory(trajectory) print V mdp.print_v(V)
def run_nnet(): mdp = mdps.MazeMDP(room_size=5, num_rooms=2) total_rewards, total_losses = simulate_symbolic_online_RL_algorithm(mdp=mdp, num_episodes=700, max_iterations=100) learning_utils.plot_rewards(total_rewards) learning_utils.plot_rewards(total_losses)
def run_nnet(): mdp = mdps.MazeMDP(room_size=5, num_rooms=2) total_rewards, total_losses = simulate_symbolic_online_RL_algorithm( mdp=mdp, num_episodes=700, max_iterations=100) learning_utils.plot_rewards(total_rewards) learning_utils.plot_rewards(total_losses)